diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 9b0bace8bf..46abf8abae 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -353,7 +353,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_left_outer_join.q,\ vector_left_outer_join2.q,\ vector_leftsemi_mapjoin.q,\ - vector_mapjoin_reduce.q,\ vector_mr_diff_schema_alias.q,\ vector_multi_insert.q,\ vector_null_projection.q,\ @@ -621,6 +620,7 @@ minillaplocal.query.files=\ special_character_in_tabnames_1.q,\ sqlmerge.q,\ stats_based_fetch_decision.q,\ + subquery_in_having.q,\ subquery_notin.q,\ subquery_nested_subquery.q, \ subquery_select.q, \ @@ -680,6 +680,7 @@ minillaplocal.query.files=\ vector_join30.q,\ vector_join_filters.q,\ vector_leftsemi_mapjoin.q,\ + vector_mapjoin_reduce.q,\ vector_number_compare_projection.q,\ vector_partitioned_date_time.q,\ vector_ptf_part_simple.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java index 3a1897f4aa..c6a5ce261a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveSubQRemoveRelBuilder.java @@ -1109,10 +1109,8 @@ public RexNode field(RexNode e, String name) { return getRexBuilder().makeFieldAccess(e, name, false); } - /** Creates a {@link org.apache.calcite.rel.core.Join} with correlating - * variables. */ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, - Set variablesSet) { + Set variablesSet, boolean createSemiJoin) { Frame right = stack.pop(); final Frame left = stack.pop(); final RelNode join; @@ -1121,10 +1119,10 @@ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, if (correlate) { final CorrelationId id = Iterables.getOnlyElement(variablesSet); final ImmutableBitSet requiredColumns = - RelOptUtil.correlationColumns(id, right.rel); + RelOptUtil.correlationColumns(id, right.rel); if (!RelOptUtil.notContainsCorrelation(left.rel, id, Litmus.IGNORE)) { throw new IllegalArgumentException("variable " + id - + " must not be used by left input to correlation"); + + " must not be used by left input to correlation"); } switch (joinType) { case LEFT: @@ -1138,11 +1136,18 @@ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, default: postCondition = condition; } - join = correlateFactory.createCorrelate(left.rel, right.rel, id, - requiredColumns, SemiJoinType.of(joinType)); + if(createSemiJoin) { + join = correlateFactory.createCorrelate(left.rel, right.rel, id, + requiredColumns, SemiJoinType.SEMI); + } + else { + join = correlateFactory.createCorrelate(left.rel, right.rel, id, + requiredColumns, SemiJoinType.of(joinType)); + + } } else { join = joinFactory.createJoin(left.rel, right.rel, condition, - variablesSet, joinType, false); + variablesSet, joinType, false); } final List> pairs = new ArrayList<>(); pairs.addAll(left.right); @@ -1152,6 +1157,13 @@ public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, return this; } + /** Creates a {@link org.apache.calcite.rel.core.Join} with correlating + * variables. */ + public HiveSubQRemoveRelBuilder join(JoinRelType joinType, RexNode condition, + Set variablesSet) { + return join(joinType, condition, variablesSet, false) ; + } + /** Creates a {@link org.apache.calcite.rel.core.Join} using USING syntax. * *

For each of the field names, both left and right inputs must have a diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 62125f0fb7..d1fe49c875 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -61,8 +61,10 @@ import org.apache.calcite.rel.logical.LogicalAggregate; import org.apache.calcite.rel.logical.LogicalCorrelate; import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalIntersect; import org.apache.calcite.rel.logical.LogicalJoin; import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalUnion; import org.apache.calcite.rel.metadata.RelMdUtil; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.rules.FilterJoinRule; @@ -77,10 +79,12 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SemiJoinType; import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; @@ -92,6 +96,7 @@ import org.apache.calcite.util.Bug; import org.apache.calcite.util.Holder; import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.ImmutableIntList; import org.apache.calcite.util.Litmus; import org.apache.calcite.util.Pair; import org.apache.calcite.util.ReflectUtil; @@ -99,12 +104,17 @@ import org.apache.calcite.util.Stacks; import org.apache.calcite.util.Util; import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelShuttleImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,6 +132,8 @@ import com.google.common.collect.Sets; import com.google.common.collect.SortedSetMultimap; +import java.util.Stack; + /** * NOTE: this whole logic is replicated from Calcite's RelDecorrelator * and is exteneded to make it suitable for HIVE @@ -176,6 +188,8 @@ private final HashSet generatedCorRels = Sets.newHashSet(); + private Stack valueGen = new Stack(); + //~ Constructors ----------------------------------------------------------- private HiveRelDecorrelator ( @@ -262,6 +276,8 @@ private RelNode decorrelate(RelNode root) { return planner2.findBestExp(); } + assert(valueGen.isEmpty()); + return root; } @@ -319,8 +335,14 @@ public RelNode removeCorrelationViaRule(RelNode root) { return planner.findBestExp(); } + protected RexNode decorrelateExpr(RexNode exp, boolean valueGenerator) { + DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle(); + shuttle.setValueGenerator(valueGenerator); + return exp.accept(shuttle); + } protected RexNode decorrelateExpr(RexNode exp) { DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle(); + shuttle.setValueGenerator(true); return exp.accept(shuttle); } @@ -1106,7 +1128,11 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel) { try { findCorrelationEquivalent(correlation, ((Filter) rel).getCondition()); } catch (Util.FoundOne e) { - map.put(def, (Integer) e.getNode()); + // we need to keep predicate kind e.g. EQUAL or NOT EQUAL + // so that later while decorrelating LogicalCorrelate appropriate join predicate + // is generated + def.setPredicateKind((SqlKind)((Pair)e.getNode()).getValue()); + map.put(def, (Integer)((Pair) e.getNode()).getKey()); } } // If all correlation variables are now satisfied, skip creating a value @@ -1145,16 +1171,23 @@ private Frame decorrelateInputWithValueGenerator(RelNode rel) { private void findCorrelationEquivalent(CorRef correlation, RexNode e) throws Util.FoundOne { switch (e.getKind()) { - case EQUALS: + // TODO: for now only EQUAL and NOT EQUAL corr predicates are optimized + //optimize rest of the predicates + case NOT_EQUALS: + if((boolean)valueGen.peek()) { + // we will need value generator + break; + } + case EQUALS: final RexCall call = (RexCall) e; final List operands = call.getOperands(); if (references(operands.get(0), correlation) && operands.get(1) instanceof RexInputRef) { - throw new Util.FoundOne(((RexInputRef) operands.get(1)).getIndex()); + throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(1)).getIndex(), e.getKind())); } if (references(operands.get(1), correlation) && operands.get(0) instanceof RexInputRef) { - throw new Util.FoundOne(((RexInputRef) operands.get(0)).getIndex()); + throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(0)).getIndex(), e.getKind())); } break; case AND: @@ -1223,17 +1256,38 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { return null; } + Frame oldInputFrame = frame; // If this LogicalFilter has correlated reference, create value generator // and produce the correlated variables in the new output. if (cm.mapRefRelToCorRef.containsKey(rel)) { frame = decorrelateInputWithValueGenerator(rel); } - // Replace the filter expression to reference output of the join - // Map filter to the new filter over join - relBuilder.push(frame.r).filter( - simplifyComparison(decorrelateExpr(rel.getCondition()))); + boolean valueGenerator = true; + if(frame.r == oldInputFrame.r) { + // this means correated value generator wasn't generated + valueGenerator = false; + } + if(oldInput instanceof LogicalCorrelate && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI + && !cm.mapRefRelToCorRef.containsKey(rel)) { + // this conditions need to be pushed into semi-join since this condition + // corresponds to IN + HiveSemiJoin join = ((HiveSemiJoin)frame.r); + final List conditions = new ArrayList<>(); + RexNode joinCond = join.getCondition(); + conditions.add(joinCond); + conditions.add(decorrelateExpr(rel.getCondition(), valueGenerator)); + final RexNode condition = + RexUtil.composeConjunction(rexBuilder, conditions, false); + RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), join.getLeft(), join.getRight(), + condition,join.getLeftKeys(), join.getRightKeys()); + return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs); + } + // Replace the filter expression to reference output of the join + // Map filter to the new filter over join + relBuilder.push(frame.r).filter( + (decorrelateExpr(rel.getCondition(), valueGenerator))); // Filter does not change the input ordering. // Filter rel does not permute the input. // All corvars produced by filter will have the same output positions in the @@ -1243,39 +1297,6 @@ public Frame decorrelateRel(HiveFilter rel) throws SemanticException { } } - private RexNode simplifyComparison(RexNode op) { - switch(op.getKind()) { - case EQUALS: - case GREATER_THAN: - case GREATER_THAN_OR_EQUAL: - case LESS_THAN: - case LESS_THAN_OR_EQUAL: - case NOT_EQUALS: - RexCall e = (RexCall) op; - final List operands = new ArrayList<>(e.operands); - - // Simplify "x x" - final RexNode o0 = operands.get(0); - final RexNode o1 = operands.get(1); - // this should only be called when we are creating filter (decorrelate filter) - // since in that case null/unknown is treated as false we don't care about - // nullability of operands and will always rewrite op=op to op is not null - if (RexUtil.eq(o0, o1) ) - switch (e.getKind()) { - case EQUALS: - case GREATER_THAN_OR_EQUAL: - case LESS_THAN_OR_EQUAL: - // "x = x" simplifies to "x is not null" (similarly <= and >=) - return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0); - default: - // "x != x" simplifies to "false" (similarly < and >) - return rexBuilder.makeLiteral(false); - } - } - return op; - } - - /** * Rewrite LogicalFilter. * @@ -1312,9 +1333,31 @@ public Frame decorrelateRel(LogicalFilter rel) { } + boolean valueGenerator = true; + if(frame.r == oldInput) { + // this means correated value generator wasn't generated + valueGenerator = false; + } + + if(oldInput instanceof LogicalCorrelate && ((LogicalCorrelate) oldInput).getJoinType() == SemiJoinType.SEMI + && !cm.mapRefRelToCorRef.containsKey(rel)) { + // this conditions need to be pushed into semi-join since this condition + // corresponds to IN + HiveSemiJoin join = ((HiveSemiJoin)frame.r); + final List conditions = new ArrayList<>(); + RexNode joinCond = join.getCondition(); + conditions.add(joinCond); + conditions.add(decorrelateExpr(rel.getCondition(), valueGenerator)); + final RexNode condition = + RexUtil.composeConjunction(rexBuilder, conditions, false); + RelNode newRel = HiveSemiJoin.getSemiJoin(frame.r.getCluster(), frame.r.getTraitSet(), join.getLeft(), join.getRight(), + condition,join.getLeftKeys(), join.getRightKeys()); + return register(rel, newRel, frame.oldToNewOutputs, frame.corDefOutputs); + } + // Replace the filter expression to reference output of the join // Map filter to the new filter over join - relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition())); + relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition(), valueGenerator)); // Filter does not change the input ordering. @@ -1344,6 +1387,9 @@ public Frame decorrelateRel(LogicalCorrelate rel) { final RelNode oldLeft = rel.getInput(0); final RelNode oldRight = rel.getInput(1); + boolean mightRequireValueGen = new findIfValueGenRequired().traverse(oldRight); + valueGen.push(mightRequireValueGen); + final Frame leftFrame = getInvoke(oldLeft, rel); final Frame rightFrame = getInvoke(oldRight, rel); @@ -1380,11 +1426,24 @@ public Frame decorrelateRel(LogicalCorrelate rel) { } final int newLeftPos = leftFrame.oldToNewOutputs.get(corDef.field); final int newRightPos = rightOutput.getValue(); - conditions.add( - rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, - RexInputRef.of(newLeftPos, newLeftOutput), - new RexInputRef(newLeftFieldCount + newRightPos, - newRightOutput.get(newRightPos).getType()))); + if(corDef.getPredicateKind() == SqlKind.NOT_EQUALS) { + conditions.add( + rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, + RexInputRef.of(newLeftPos, newLeftOutput), + new RexInputRef(newLeftFieldCount + newRightPos, + newRightOutput.get(newRightPos).getType()))); + + } + else { + assert(corDef.getPredicateKind() == null + || corDef.getPredicateKind() == SqlKind.EQUALS); + conditions.add( + rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, + RexInputRef.of(newLeftPos, newLeftOutput), + new RexInputRef(newLeftFieldCount + newRightPos, + newRightOutput.get(newRightPos).getType()))); + + } // remove this cor var from output position mapping corDefOutputs.remove(corDef); @@ -1408,24 +1467,40 @@ public Frame decorrelateRel(LogicalCorrelate rel) { int oldLeftFieldCount = oldLeft.getRowType().getFieldCount(); int oldRightFieldCount = oldRight.getRowType().getFieldCount(); - assert rel.getRowType().getFieldCount() - == oldLeftFieldCount + oldRightFieldCount; // Left input positions are not changed. mapOldToNewOutputs.putAll(leftFrame.oldToNewOutputs); - // Right input positions are shifted by newLeftFieldCount. - for (int i = 0; i < oldRightFieldCount; i++) { - mapOldToNewOutputs.put( - i + oldLeftFieldCount, - rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount); - } final RexNode condition = - RexUtil.composeConjunction(rexBuilder, conditions, false); - RelNode newJoin = - LogicalJoin.create(leftFrame.r, rightFrame.r, condition, - ImmutableSet.of(), rel.getJoinType().toJoinType()); + RexUtil.composeConjunction(rexBuilder, conditions, false); + RelNode newJoin = null; + + // this indicates original query was either correlated EXISTS or IN + if(rel.getJoinType() == SemiJoinType.SEMI) { + final List leftKeys = new ArrayList(); + final List rightKeys = new ArrayList(); + + RelNode[] inputRels = new RelNode[] { leftFrame.r, rightFrame.r}; + newJoin = HiveSemiJoin.getSemiJoin(rel.getCluster(), rel.getCluster().traitSetOf(HiveRelNode.CONVENTION), + leftFrame.r, rightFrame.r, condition, ImmutableIntList.copyOf(leftKeys), + ImmutableIntList.copyOf(rightKeys)); + + } + else { + // Right input positions are shifted by newLeftFieldCount. + for (int i = 0; i < oldRightFieldCount; i++) { + mapOldToNewOutputs.put( + i + oldLeftFieldCount, + rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount); + } + + newJoin = LogicalJoin.create(leftFrame.r, rightFrame.r, condition, + ImmutableSet.of(), rel.getJoinType().toJoinType()); + + } + + valueGen.pop(); return register(rel, newJoin, mapOldToNewOutputs, corDefOutputs); } @@ -1545,6 +1620,7 @@ private RexInputRef getNewForOldInputRef(RexInputRef oldInputRef) { assert currentRel != null; int oldOrdinal = oldInputRef.getIndex(); + int oldOrdinalNo = oldOrdinal; int newOrdinal = 0; // determine which input rel oldOrdinal references, and adjust @@ -1563,6 +1639,17 @@ private RexInputRef getNewForOldInputRef(RexInputRef oldInputRef) { oldOrdinal -= n; } + if(oldInput == null) { + if(currentRel.getInputs().size() == 1 && currentRel.getInput(0) instanceof LogicalCorrelate) { + final Frame newFrame = map.get(currentRel.getInput(0)); + if(newFrame.r instanceof HiveSemiJoin) { + int oldFieldSize = currentRel.getInput(0).getRowType().getFieldCount(); + int newOrd = newFrame.r.getRowType().getFieldCount() + oldOrdinalNo - oldFieldSize; + return new RexInputRef(newOrd, oldInputRef.getType()); + } + } + } + assert oldInput != null; final Frame frame = map.get(oldInput); @@ -1819,7 +1906,66 @@ private static RelNode stripHep(RelNode rel) { /** Shuttle that decorrelates. */ private class DecorrelateRexShuttle extends RexShuttle { + private boolean valueGenerator; + public void setValueGenerator(boolean valueGenerator) { + this.valueGenerator = valueGenerator; + } + + // DecorrelateRexShuttle ends up decorrelating expressions cor.col1 <> $4 + // to $4=$4 if value generator is not generated, $4<>$4 is further simplified + // to false. This is wrong and messes up the whole tree. To prevent this visitCall + // is overridden to rewrite/simply such predicates to is not null. + // we also need to take care that we do this only for correlated predicates and + // not user specified explicit predicates + // TODO: This code should be removed once CALCITE-1851 is fixed and + // there is support of not equal + @Override public RexNode visitCall(final RexCall call) { + if(!valueGenerator) { + switch (call.getKind()) { + case EQUALS: + case NOT_EQUALS: + final List operands = new ArrayList<>(call.operands); + RexNode o0 = operands.get(0); + RexNode o1 = operands.get(1); + boolean isCorrelated = false; + if (o0 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o0) != null)) { + o0 = decorrFieldAccess((RexFieldAccess) o0); + isCorrelated = true; + + } + if (o1 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o1) != null)) { + o1 = decorrFieldAccess((RexFieldAccess) o1); + isCorrelated = true; + } + if (isCorrelated && RexUtil.eq(o0, o1)) { + return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0); + } + + final List newOperands = new ArrayList<>(); + newOperands.add(o0); + newOperands.add(o1); + boolean[] update = { false }; + List clonedOperands = visitList(newOperands, update); + + return relBuilder.call(call.getOperator(), clonedOperands); + } + } + return super.visitCall(call); + } + @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) { + return decorrFieldAccess(fieldAccess); + } + + @Override public RexNode visitInputRef(RexInputRef inputRef) { + final RexInputRef ref = getNewForOldInputRef(inputRef); + if (ref.getIndex() == inputRef.getIndex() + && ref.getType() == inputRef.getType()) { + return inputRef; // re-use old object, to prevent needless expr cloning + } + return ref; + } + private RexNode decorrFieldAccess(RexFieldAccess fieldAccess) { int newInputOutputOffset = 0; for (RelNode input : currentRel.getInputs()) { final Frame frame = map.get(input); @@ -1834,7 +1980,7 @@ private static RelNode stripHep(RelNode rel) { // This input rel does produce the cor var referenced. // Assume fieldAccess has the correct type info. return new RexInputRef(newInputPos + newInputOutputOffset, - frame.r.getRowType().getFieldList().get(newInputPos) + frame.r.getRowType().getFieldList().get(newInputPos) .getType()); } } @@ -1848,15 +1994,6 @@ private static RelNode stripHep(RelNode rel) { } return fieldAccess; } - - @Override public RexNode visitInputRef(RexInputRef inputRef) { - final RexInputRef ref = getNewForOldInputRef(inputRef); - if (ref.getIndex() == inputRef.getIndex() - && ref.getType() == inputRef.getType()) { - return inputRef; // re-use old object, to prevent needless expr cloning - } - return ref; - } } /** Shuttle that removes correlations. */ @@ -2881,10 +3018,12 @@ public CorDef def() { static class CorDef implements Comparable { public final CorrelationId corr; public final int field; + private SqlKind predicateKind; CorDef(CorrelationId corr, int field) { this.corr = corr; this.field = field; + this.predicateKind = null; } @Override public String toString() { @@ -2909,6 +3048,13 @@ public int compareTo(@Nonnull CorDef o) { } return Integer.compare(field, o.field); } + public SqlKind getPredicateKind() { + return predicateKind; + } + public void setPredicateKind(SqlKind predKind) { + this.predicateKind = predKind; + + } } /** A map of the locations of @@ -2986,6 +3132,107 @@ public boolean hasCorrelation() { } } + private static class findIfValueGenRequired extends HiveRelShuttleImpl { + private boolean mightRequireValueGen ; + findIfValueGenRequired() { this.mightRequireValueGen = true; } + + private boolean hasRexOver(List projects) { + for(RexNode expr : projects) { + if(expr instanceof RexOver) { + return true; + } + } + return false; + } + @Override public RelNode visit(HiveJoin rel) { + mightRequireValueGen = true; + return rel; + } + public RelNode visit(HiveSortLimit rel) { + mightRequireValueGen = true; + return rel; + } + public RelNode visit(HiveUnion rel) { + mightRequireValueGen = true; + return rel; + } + public RelNode visit(LogicalUnion rel) { + mightRequireValueGen = true; + return rel; + } + public RelNode visit(LogicalIntersect rel) { + mightRequireValueGen = true; + return rel; + } + + public RelNode visit(HiveIntersect rel) { + mightRequireValueGen = true; + return rel; + } + + @Override public RelNode visit(LogicalJoin rel) { + mightRequireValueGen = true; + return rel; + } + @Override public RelNode visit(HiveProject rel) { + if(!(hasRexOver(((HiveProject)rel).getProjects()))) { + mightRequireValueGen = false; + return super.visit(rel); + } + else { + mightRequireValueGen = true; + return rel; + } + } + @Override public RelNode visit(LogicalProject rel) { + if(!(hasRexOver(((LogicalProject)rel).getProjects()))) { + mightRequireValueGen = false; + return super.visit(rel); + } + else { + mightRequireValueGen = true; + return rel; + } + } + @Override public RelNode visit(HiveAggregate rel) { + // if there are aggregate functions or grouping sets we will need + // value generator + if((((HiveAggregate)rel).getAggCallList().isEmpty() == true + && ((HiveAggregate)rel).indicator == false)) { + this.mightRequireValueGen = false; + return super.visit(rel); + } + else { + // need to reset to true in case previous aggregate/project + // has set it to false + this.mightRequireValueGen = true; + return rel; + } + } + @Override public RelNode visit(LogicalAggregate rel) { + if((((LogicalAggregate)rel).getAggCallList().isEmpty() == true + && ((LogicalAggregate)rel).indicator == false)) { + this.mightRequireValueGen = false; + return super.visit(rel); + } + else { + // need to reset to true in case previous aggregate/project + // has set it to false + this.mightRequireValueGen = true; + return rel; + } + } + @Override public RelNode visit(LogicalCorrelate rel) { + // this means we are hitting nested subquery so don't + // need to go further + return rel; + } + + public boolean traverse(RelNode root) { + root.accept(this); + return mightRequireValueGen; + } + } /** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */ private static class CorelMapBuilder extends HiveRelShuttleImpl { final SortedMap mapCorToCorRel = diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index 2dca6a25ac..90aab6e2d2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -370,9 +370,23 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, case TRUE: if (fields.isEmpty()) { builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId())); - builder.aggregate(builder.groupKey(0)); + if(!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { + // avoid adding group by for correlated IN/EXISTS queries + // since this is rewritting into semijoin + break; + } + else { + builder.aggregate(builder.groupKey(0)); + } } else { - builder.aggregate(builder.groupKey(fields)); + if(!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) { + // avoid adding group by for correlated IN/EXISTS queries + // since this is rewritting into semijoin + break; + } + else { + builder.aggregate(builder.groupKey(fields)); + } } break; default: @@ -389,7 +403,7 @@ protected RexNode apply(RexSubQuery e, Set variablesSet, } switch (logic) { case TRUE: - builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet); + builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true); return builder.literal(true); } builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java index 5e8a994873..73fdff4dc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelColumnsAlignment; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -273,10 +274,12 @@ private static boolean validJoinParent(RelNode joinNode, RelNode parent) { // But we only need the additional project if the left child // is another join too; if it is not, ASTConverter will swap // the join inputs, leaving the join operator on the left. + // we also do it if parent is HiveSemiJoin since ASTConverter won't + // swap inputs then // This will help triggering multijoin recognition methods that // are embedded in SemanticAnalyzer. if (((Join) parent).getRight() == joinNode && - (((Join) parent).getLeft() instanceof Join) ) { + (((Join) parent).getLeft() instanceof Join || parent instanceof HiveSemiJoin) ) { validParent = false; } } else if (parent instanceof SetOp) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 80351bef87..0479b17cbd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1787,13 +1787,6 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv "Calcite: Prejoin ordering transformation, Push down limit through outer join"); } - // 5. Push Down Semi Joins - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, SemiJoinJoinTransposeRule.INSTANCE, - SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); - // 6. Apply Partition Pruning perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, new HivePartitionPruneRule(conf)); diff --git a/ql/src/test/queries/clientpositive/subquery_exists.q b/ql/src/test/queries/clientpositive/subquery_exists.q index 19c42f0c29..08681e58fe 100644 --- a/ql/src/test/queries/clientpositive/subquery_exists.q +++ b/ql/src/test/queries/clientpositive/subquery_exists.q @@ -89,8 +89,8 @@ drop table t; drop table if exists tx1; create table tx1 (a integer,b integer); insert into tx1 values (1, 1), - (1, 2), - (1, 3); + (1, 2), + (1, 3); select count(*) as result,3 as expected from tx1 u where exists (select * from tx1 v where u.a=v.a and u.b <> v.b); @@ -108,4 +108,4 @@ insert into t2 values(4,2),(4,3),(4,5); explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j); select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j); drop table t1; -drop table t2; +drop table t2; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q index 4ba170a706..33cc2feb97 100644 --- a/ql/src/test/queries/clientpositive/subquery_in.q +++ b/ql/src/test/queries/clientpositive/subquery_in.q @@ -74,25 +74,47 @@ from part b where b.p_size in ; -- distinct, corr -explain -select * -from src b +explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) ; -select * -from src b +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) ; +-- corr, non equi predicate, should not have a join with outer to generate +-- corr values +explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +; + +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +; + + -- non agg, non corr, windowing select p_mfgr, p_name, p_size from part @@ -267,3 +289,13 @@ select * from t where i IN (select sum(i) from tt where tt.j = t.j); drop table t; drop table tt; + +-- since inner query has aggregate it will be joined with outer to get all possible corrrelated values +explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name); +select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name); + +-- inner query has join so should have a join with outer query to fetch all corr values +explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name); +select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name); + + diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out index 87618df902..455c630e32 100644 --- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out +++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out @@ -91,7 +91,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (l_linenumber = 1) (type: boolean) + predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -107,10 +107,10 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) + predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), l_linenumber (type: int) + expressions: l_orderkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out index 998a5df264..7a486153d5 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out @@ -119,24 +119,26 @@ Stage-0 Select Operator [SEL_17] (rows=5 width=232) Output:["_col0","_col1","_col2"] Merge Join Operator [MERGEJOIN_28] (rows=5 width=232) - Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col4"] + Conds:RS_14._col2=RS_15._col0(Left Semi),Output:["_col0","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_8] (rows=3 width=188) - Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=3 width=188) - predicate:id is not null - TableScan [TS_6] (rows=3 width=188) - default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"] + Group By Operator [GBY_13] (rows=5 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_8] (rows=5 width=4) + Output:["_col0"] + Filter Operator [FIL_26] (rows=5 width=4) + predicate:id is not null + TableScan [TS_6] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_14] - PartitionCols:_col0 + PartitionCols:_col2 Merge Join Operator [MERGEJOIN_27] (rows=5 width=211) - Conds:RS_11._col2=RS_12._col0(Left Semi),Output:["_col0"] + Conds:RS_9._col0=RS_10._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_11] - PartitionCols:_col2 + SHUFFLE [RS_9] + PartitionCols:_col0 Select Operator [SEL_2] (rows=5 width=192) Output:["_col0","_col2"] Filter Operator [FIL_24] (rows=5 width=192) @@ -144,16 +146,14 @@ Stage-0 TableScan [TS_0] (rows=10 width=192) default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"] <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_12] + SHUFFLE [RS_10] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=5 width=4) - Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=5 width=4) - Output:["_col0"] - Filter Operator [FIL_25] (rows=5 width=4) - predicate:id is not null - TableScan [TS_3] (rows=5 width=4) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + Select Operator [SEL_5] (rows=3 width=188) + Output:["_col0","_col1"] + Filter Operator [FIL_25] (rows=3 width=188) + predicate:id is not null + TableScan [TS_3] (rows=3 width=188) + default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"] PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 inner join table2 on table1.val = 't1val01' and table1.id = table2.id left semi join table3 on table1.dimid = table3.id PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out index 87e08fbcde..e2e2ef3861 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_2.q.out @@ -73,14 +73,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) - Map 14 <- Reducer 13 (BROADCAST_EDGE) - Reducer 10 <- Map 14 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) - Reducer 13 <- Map 9 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) + Map 13 <- Reducer 12 (BROADCAST_EDGE) + Reducer 10 <- Map 13 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) + Reducer 11 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) + Reducer 12 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 3 <- Reducer 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 5 (CUSTOM_SIMPLE_EDGE) @@ -91,10 +90,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - filterExpr: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null and (decimal2016_col_26 BETWEEN DynamicValue(RS_21_t2_decimal2709_col_9_min) AND DynamicValue(RS_21_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_21_t2_decimal2709_col_9_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_21_t2_tinyint_col_20_min) AND DynamicValue(RS_21_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_21_t2_tinyint_col_20_bloom_filter))) and (bigint_col_7 BETWEEN DynamicValue(RS_21_t2_tinyint_col_15_min) AND DynamicValue(RS_21_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_21_t2_tinyint_col_15_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_24_tt2_timestamp_col_18_min) AND DynamicValue(RS_24_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_24_tt2_timestamp_col_18_bloom_filter)))) (type: boolean) + filterExpr: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null and timestamp_col_9 is not null and (decimal2016_col_26 BETWEEN DynamicValue(RS_17_t2_decimal2709_col_9_min) AND DynamicValue(RS_17_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_17_t2_decimal2709_col_9_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_17_t2_tinyint_col_20_min) AND DynamicValue(RS_17_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_17_t2_tinyint_col_20_bloom_filter))) and (bigint_col_7 BETWEEN DynamicValue(RS_17_t2_tinyint_col_15_min) AND DynamicValue(RS_17_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_17_t2_tinyint_col_15_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_22_tt2_timestamp_col_18_min) AND DynamicValue(RS_22_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_22_tt2_timestamp_col_18_bloom_filter)))) (type: boolean) Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((bigint_col_7 BETWEEN DynamicValue(RS_21_t2_tinyint_col_15_min) AND DynamicValue(RS_21_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_21_t2_tinyint_col_15_bloom_filter))) and (decimal2016_col_26 BETWEEN DynamicValue(RS_21_t2_decimal2709_col_9_min) AND DynamicValue(RS_21_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_21_t2_decimal2709_col_9_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_24_tt2_timestamp_col_18_min) AND DynamicValue(RS_24_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_24_tt2_timestamp_col_18_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_21_t2_tinyint_col_20_min) AND DynamicValue(RS_21_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_21_t2_tinyint_col_20_bloom_filter))) and bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null) (type: boolean) + predicate: ((bigint_col_7 BETWEEN DynamicValue(RS_17_t2_tinyint_col_15_min) AND DynamicValue(RS_17_t2_tinyint_col_15_max) and in_bloom_filter(bigint_col_7, DynamicValue(RS_17_t2_tinyint_col_15_bloom_filter))) and (decimal2016_col_26 BETWEEN DynamicValue(RS_17_t2_decimal2709_col_9_min) AND DynamicValue(RS_17_t2_decimal2709_col_9_max) and in_bloom_filter(decimal2016_col_26, DynamicValue(RS_17_t2_decimal2709_col_9_bloom_filter))) and (timestamp_col_9 BETWEEN DynamicValue(RS_22_tt2_timestamp_col_18_min) AND DynamicValue(RS_22_tt2_timestamp_col_18_max) and in_bloom_filter(timestamp_col_9, DynamicValue(RS_22_tt2_timestamp_col_18_bloom_filter))) and (tinyint_col_3 BETWEEN DynamicValue(RS_17_t2_tinyint_col_20_min) AND DynamicValue(RS_17_t2_tinyint_col_20_max) and in_bloom_filter(tinyint_col_3, DynamicValue(RS_17_t2_tinyint_col_20_bloom_filter))) and bigint_col_7 is not null and decimal2016_col_26 is not null and timestamp_col_9 is not null and tinyint_col_3 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: bigint_col_7 (type: bigint), decimal2016_col_26 (type: decimal(20,16)), tinyint_col_3 (type: tinyint), timestamp_col_9 (type: timestamp) @@ -108,7 +107,7 @@ STAGE PLANS: value expressions: _col3 (type: timestamp) Execution mode: llap LLAP IO: no inputs - Map 14 + Map 13 Map Operator Tree: TableScan alias: tt2 @@ -133,10 +132,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - filterExpr: (tinyint_col_15 is not null and decimal2709_col_9 is not null and tinyint_col_20 is not null) (type: boolean) + filterExpr: (tinyint_col_15 is not null and decimal2709_col_9 is not null and tinyint_col_20 is not null and smallint_col_19 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (decimal2709_col_9 is not null and tinyint_col_15 is not null and tinyint_col_20 is not null) (type: boolean) + predicate: (decimal2709_col_9 is not null and smallint_col_19 is not null and tinyint_col_15 is not null and tinyint_col_20 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: tinyint_col_15 (type: tinyint), decimal2709_col_9 (type: decimal(27,9)), tinyint_col_20 (type: tinyint), smallint_col_19 (type: smallint) @@ -233,47 +232,34 @@ STAGE PLANS: 1 _col0 (type: decimal(26,12)) outputColumnNames: _col2 Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: timestamp) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: timestamp) - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Reducer 11 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp) - outputColumnNames: _col1 + expressions: _col2 (type: timestamp), -92 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: timestamp), -92 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: timestamp), -92 (type: int) + Group By Operator + keys: _col0 (type: timestamp), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: timestamp) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int) + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary) - Reducer 12 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary) + Reducer 11 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -285,7 +271,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: binary) - Reducer 13 + Reducer 12 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -318,10 +304,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col3 (type: timestamp), UDFToInteger(_col7) (type: int) - 1 _col1 (type: timestamp), -92 (type: int) + 1 _col0 (type: timestamp), _col1 (type: int) Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 6e55acf0d8..f03a65f248 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1683,11 +1683,11 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_19] - Select Operator [SEL_18] (rows=434 width=178) + Select Operator [SEL_18] (rows=366 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=434 width=178) + Filter Operator [FIL_17] (rows=366 width=179) predicate:_col3 is null - Merge Join Operator [MERGEJOIN_22] (rows=500 width=178) + Merge Join Operator [MERGEJOIN_22] (rows=500 width=179) Conds:RS_14._col1=RS_15._col0(Left Outer),Output:["_col0","_col1","_col3"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_14] @@ -1699,21 +1699,21 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_13] (rows=41 width=95) + Select Operator [SEL_13] (rows=83 width=95) Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=41 width=91) + Group By Operator [GBY_12] (rows=83 width=91) Output:["_col0"],keys:_col1 - Select Operator [SEL_8] (rows=41 width=178) + Select Operator [SEL_8] (rows=83 width=178) Output:["_col1"] - Group By Operator [GBY_7] (rows=41 width=178) + Group By Operator [GBY_7] (rows=83 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Group By Operator [GBY_5] (rows=41 width=178) + Group By Operator [GBY_5] (rows=83 width=178) Output:["_col0","_col1"],keys:value, key - Filter Operator [FIL_21] (rows=83 width=178) - predicate:((value = value) and (value > 'val_2')) + Filter Operator [FIL_21] (rows=166 width=178) + predicate:(value > 'val_2') Please refer to the previous TableScan [TS_0] PREHOOK: query: explain select * @@ -1747,11 +1747,11 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=234 width=178) + Select Operator [SEL_17] (rows=183 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=234 width=178) + Filter Operator [FIL_16] (rows=183 width=179) predicate:_col4 is null - Merge Join Operator [MERGEJOIN_21] (rows=250 width=178) + Merge Join Operator [MERGEJOIN_21] (rows=250 width=179) Conds:RS_13._col0, _col1=RS_14._col0, _col1(Left Outer),Output:["_col0","_col1","_col4"] <-Reducer 2 [ONE_TO_ONE_EDGE] llap FORWARD [RS_13] @@ -1770,17 +1770,17 @@ Stage-0 <-Reducer 4 [ONE_TO_ONE_EDGE] llap FORWARD [RS_14] PartitionCols:_col0, _col1 - Select Operator [SEL_12] (rows=20 width=182) + Select Operator [SEL_12] (rows=83 width=182) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_11] (rows=20 width=178) + Group By Operator [GBY_11] (rows=83 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=20 width=178) + Group By Operator [GBY_9] (rows=83 width=178) Output:["_col0","_col1"],keys:key, value - Filter Operator [FIL_20] (rows=41 width=178) - predicate:((key = key) and (value = value) and (value > 'val_12')) + Filter Operator [FIL_20] (rows=166 width=178) + predicate:((value > 'val_12') and key is not null) Please refer to the previous TableScan [TS_0] PREHOOK: query: create view cv1 as @@ -1821,26 +1821,28 @@ Stage-0 limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_11] - Merge Join Operator [MERGEJOIN_16] (rows=32 width=178) - Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"] + File Output Operator [FS_12] + Merge Join Operator [MERGEJOIN_17] (rows=133 width=178) + Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] + SHUFFLE [RS_8] PartitionCols:_col0, _col1 - Select Operator [SEL_1] (rows=500 width=178) + Select Operator [SEL_2] (rows=166 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"} + Filter Operator [FIL_15] (rows=166 width=178) + predicate:((value > 'val_9') and key is not null) + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"],properties:{"insideView":"TRUE"} <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] + SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=20 width=178) + Group By Operator [GBY_7] (rows=83 width=178) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_4] (rows=41 width=178) + Select Operator [SEL_5] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=41 width=178) - predicate:((key = key) and (value = value) and (value > 'val_9')) - TableScan [TS_2] (rows=500 width=178) + Filter Operator [FIL_16] (rows=166 width=178) + predicate:((value > 'val_9') and key is not null) + TableScan [TS_3] (rows=500 width=178) default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select * @@ -1871,26 +1873,28 @@ Stage-0 limit:-1 Stage-1 Reducer 2 llap - File Output Operator [FS_11] - Merge Join Operator [MERGEJOIN_16] (rows=32 width=178) - Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"] + File Output Operator [FS_12] + Merge Join Operator [MERGEJOIN_17] (rows=133 width=178) + Conds:RS_8._col0, _col1=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] + SHUFFLE [RS_8] PartitionCols:_col0, _col1 - Select Operator [SEL_1] (rows=500 width=178) + Select Operator [SEL_2] (rows=166 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Filter Operator [FIL_15] (rows=166 width=178) + predicate:((value > 'val_9') and key is not null) + TableScan [TS_0] (rows=500 width=178) + default@src_cbo,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] + SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=20 width=178) + Group By Operator [GBY_7] (rows=83 width=178) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_4] (rows=41 width=178) + Select Operator [SEL_5] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=41 width=178) - predicate:((key = key) and (value = value) and (value > 'val_9')) - TableScan [TS_2] (rows=500 width=178) + Filter Operator [FIL_16] (rows=166 width=178) + predicate:((value > 'val_9') and key is not null) + TableScan [TS_3] (rows=500 width=178) default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select * @@ -1958,19 +1962,19 @@ Stage-0 Stage-1 Reducer 4 llap File Output Operator [FS_22] - Select Operator [SEL_21] (rows=7 width=8) + Select Operator [SEL_21] (rows=3 width=8) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_32] (rows=7 width=8) + Merge Join Operator [MERGEJOIN_32] (rows=3 width=8) Conds:RS_18._col1, _col4=RS_19._col0, _col1(Left Semi),Output:["_col0","_col3"] <-Map 6 [SIMPLE_EDGE] llap SHUFFLE [RS_19] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=2 width=8) + Group By Operator [GBY_17] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_12] (rows=7 width=8) + Select Operator [SEL_12] (rows=2 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_30] (rows=7 width=96) - predicate:((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) + Filter Operator [FIL_30] (rows=2 width=96) + predicate:((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) TableScan [TS_10] (rows=100 width=96) default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"] <-Reducer 3 [SIMPLE_EDGE] llap @@ -1984,7 +1988,7 @@ Stage-0 Select Operator [SEL_9] (rows=14 width=16) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_29] (rows=14 width=16) - predicate:((l_linenumber = 1) and l_partkey is not null) + predicate:((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) TableScan [TS_7] (rows=100 width=16) default@lineitem,li,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_partkey","l_suppkey","l_linenumber"] <-Reducer 2 [ONE_TO_ONE_EDGE] llap @@ -2258,14 +2262,14 @@ Stage-0 File Output Operator [FS_26] Select Operator [SEL_25] (rows=13 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_24] (rows=13 width=227) + Filter Operator [FIL_24] (rows=13 width=231) predicate:(not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) - Merge Join Operator [MERGEJOIN_32] (rows=26 width=227) + Merge Join Operator [MERGEJOIN_32] (rows=26 width=230) Conds:RS_21._col0, _col1=RS_22._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col8"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_21] PartitionCols:_col0, _col1 - Merge Join Operator [MERGEJOIN_31] (rows=26 width=226) + Merge Join Operator [MERGEJOIN_31] (rows=26 width=229) Conds:RS_18._col1=RS_19._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_18] @@ -2277,36 +2281,36 @@ Stage-0 <-Reducer 4 [ONE_TO_ONE_EDGE] llap FORWARD [RS_19] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=1 width=114) + Group By Operator [GBY_7] (rows=2 width=114) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Group By Operator [GBY_5] (rows=1 width=114) + Group By Operator [GBY_5] (rows=2 width=114) Output:["_col0","_col1","_col2"],aggregations:["count()","count(p_name)"],keys:p_mfgr - Select Operator [SEL_4] (rows=4 width=223) + Select Operator [SEL_4] (rows=8 width=223) Output:["p_name","p_mfgr"] - Filter Operator [FIL_29] (rows=4 width=223) - predicate:((p_mfgr = p_mfgr) and (p_size < 10)) + Filter Operator [FIL_29] (rows=8 width=223) + predicate:((p_size < 10) and p_mfgr is not null) Please refer to the previous TableScan [TS_0] <-Reducer 5 [ONE_TO_ONE_EDGE] llap FORWARD [RS_22] PartitionCols:_col0, _col1 - Select Operator [SEL_17] (rows=2 width=223) + Select Operator [SEL_17] (rows=4 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_16] (rows=2 width=219) + Filter Operator [FIL_16] (rows=4 width=219) predicate:_col0 is not null - Group By Operator [GBY_14] (rows=2 width=219) + Group By Operator [GBY_14] (rows=4 width=219) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_13] PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=2 width=219) + Group By Operator [GBY_12] (rows=4 width=219) Output:["_col0","_col1"],keys:p_name, p_mfgr - Select Operator [SEL_11] (rows=4 width=223) + Select Operator [SEL_11] (rows=8 width=223) Output:["p_name","p_mfgr"] - Filter Operator [FIL_30] (rows=4 width=223) - predicate:((p_mfgr = p_mfgr) and (p_size < 10)) + Filter Operator [FIL_30] (rows=8 width=223) + predicate:((p_size < 10) and p_mfgr is not null) Please refer to the previous TableScan [TS_0] PREHOOK: query: explain select p_name, p_size @@ -2465,7 +2469,7 @@ Stage-0 Select Operator [SEL_12] (rows=1 width=114) Output:["_col0","_col1"] Filter Operator [FIL_40] (rows=1 width=114) - predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1)) + predicate:(((_col2 - _col1) > 600.0) and _col1 is not null) Group By Operator [GBY_10] (rows=5 width=114) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] llap @@ -2484,7 +2488,7 @@ Stage-0 Select Operator [SEL_24] (rows=1 width=110) Output:["_col0","_col1"] Filter Operator [FIL_41] (rows=1 width=114) - predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1)) + predicate:(((_col2 - _col1) > 600.0) and _col1 is not null) Group By Operator [GBY_22] (rows=5 width=114) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] llap diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index e206f0851e..dfe424046e 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -33,15 +33,18 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 @@ -50,22 +53,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -78,10 +81,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 32 Data size: 5696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 32 Data size: 5696 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -237,16 +240,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1074,13 +1080,13 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tx1 PREHOOK: query: insert into tx1 values (1, 1), - (1, 2), - (1, 3) + (1, 2), + (1, 3) PREHOOK: type: QUERY PREHOOK: Output: default@tx1 POSTHOOK: query: insert into tx1 values (1, 1), - (1, 2), - (1, 3) + (1, 2), + (1, 3) POSTHOOK: type: QUERY POSTHOOK: Output: default@tx1 POSTHOOK: Lineage: tx1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] @@ -1111,10 +1117,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1122,31 +1126,44 @@ STAGE PLANS: TableScan alias: u Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Group By Operator - keys: a (type: int), b (type: int) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: v + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a is not null and b is not null) (type: boolean) Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1156,18 +1173,22 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: int) - 1 _col0 (type: int), _col1 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1187,46 +1208,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - residual filter predicates: {(_col3 <> _col1)} - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) Stage: Stage-0 Fetch Operator @@ -1274,7 +1255,6 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.i EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: t2.j EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) @@ -1288,9 +1268,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1298,15 +1276,19 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: i is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1314,31 +1296,24 @@ STAGE PLANS: TableScan alias: t2 Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (i is not null and j is not null) (type: boolean) Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: j (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1348,55 +1323,22 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - residual filter predicates: {(_col1 <> _col2)} - Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int) + expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1404,7 +1346,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index af42131bc2..c7b98d3967 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -147,15 +147,18 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 @@ -164,22 +167,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -192,10 +195,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 66 Data size: 11748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 66 Data size: 11748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -445,16 +448,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_mfgr is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -463,13 +469,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs @@ -478,10 +484,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 223 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -501,7 +507,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -522,25 +528,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Reducer 5 Execution mode: llap @@ -550,16 +556,20 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -591,21 +601,21 @@ Manufacturer#2 almond aquamarine midnight light salmon 2 Manufacturer#3 almond antique misty red olive 1 Manufacturer#4 almond aquamarine yellow dodger mint 7 Manufacturer#5 almond antique sky peru orange 2 -PREHOOK: query: explain -select * -from src b +PREHOOK: query: explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) PREHOOK: type: QUERY -POSTHOOK: query: explain -select * -from src b +POSTHOOK: query: explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY @@ -618,7 +628,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -626,39 +637,28 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -679,6 +679,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -686,21 +704,21 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * -from src b +PREHOOK: query: select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select * -from src b +POSTHOOK: query: select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY @@ -717,6 +735,161 @@ POSTHOOK: Input: default@src 97 val_97 98 val_98 98 val_98 +PREHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} + Statistics: Num rows: 66 Data size: 17490 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 66 Data size: 11748 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 66 Data size: 11748 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 7134 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 7134 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 PREHOOK: query: select p_mfgr, p_name, p_size from part where part.p_size in @@ -951,7 +1124,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: l_partkey is not null (type: boolean) + predicate: (l_partkey is not null and l_quantity is not null) (type: boolean) Statistics: Num rows: 100 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_partkey (type: int), l_quantity (type: double), l_extendedprice (type: double) @@ -1028,12 +1201,12 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: double), _col3 (type: int) 1 _col0 (type: double), _col1 (type: int) outputColumnNames: _col2 - Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2) mode: hash @@ -1067,15 +1240,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: double), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Statistics: Num rows: 25 Data size: 300 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1107,7 +1288,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1117,7 +1298,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean) + predicate: ((p_brand <> 'Brand#14') and (p_size <> 340) and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) @@ -1137,7 +1318,7 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: ((p_size <> 340) and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string), p_size (type: int) @@ -1156,15 +1337,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col5 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1177,15 +1358,16 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 6 Data size: 648 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1515,9 +1697,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1525,15 +1706,18 @@ STAGE PLANS: TableScan alias: e Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), (_col1 + 100) (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), (_col1 + 100) (type: int) + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), (_col1 + 100) (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), (_col1 + 100) (type: int) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 @@ -1542,18 +1726,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_name (type: string), p_partkey (type: int) - mode: hash + Select Operator + expressions: p_name (type: string), p_partkey (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1561,10 +1749,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), (_col1 + 100) (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() @@ -1590,23 +1778,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 13 Data size: 1625 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1808,16 +1979,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col5 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1825,16 +1999,19 @@ STAGE PLANS: TableScan alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: int), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col0 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col2 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -1842,16 +2019,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_size (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Filter Operator + predicate: p_size is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_size (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1940,8 +2120,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1949,16 +2128,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_partkey is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1967,18 +2149,22 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_partkey = p_partkey) and (p_size = p_size)) (type: boolean) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_partkey (type: int), p_name (type: string), p_size (type: int) - mode: hash + predicate: (p_name is not null and p_partkey is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1986,36 +2172,19 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) - 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 1857 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 1857 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2070,8 +2239,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2079,16 +2247,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_type (type: string), p_brand (type: string) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Select Operator + expressions: p_name (type: string), p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -2097,18 +2268,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_brand is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string), p_brand (type: string) - mode: hash + Select Operator + expressions: p_type (type: string), p_brand (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2116,7 +2291,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -2129,19 +2304,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2206,16 +2368,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -2223,16 +2388,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (_col1 + 1) (type: int) - sort order: + - Map-reduce partition columns: (_col1 + 1) (type: int) + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: (_col1 + 1) (type: int) + sort order: + + Map-reduce partition columns: (_col1 + 1) (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2244,16 +2412,19 @@ STAGE PLANS: expressions: (p_size + 1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2588,10 +2759,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 5 <- Map 1 (SIMPLE_EDGE) Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2600,48 +2770,62 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + alias: sc + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key = '90') (type: boolean) - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count() - keys: '90' (type: string) + keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key = '90') (type: boolean) + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: '90' (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: COMPLETE @@ -2653,10 +2837,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 404 Data size: 35148 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -2707,23 +2891,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -3257,16 +3424,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_mfgr is not null and p_name is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: int) Execution mode: llap LLAP IO: no inputs Map 4 @@ -3275,22 +3445,22 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_mfgr = p_mfgr) and (p_size < 10)) (type: boolean) - Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((p_size < 10) and p_mfgr is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 1784 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_mfgr (type: string), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3303,15 +3473,15 @@ STAGE PLANS: 0 _col1 (type: string), _col0 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: _col1, _col3, _col4 - Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: int) sort order: ++ - Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1300 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -3319,10 +3489,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3374,16 +3544,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -3392,7 +3565,7 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_type (type: string) @@ -3534,16 +3707,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -3552,7 +3728,7 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_size is not null and p_type is not null) (type: boolean) + predicate: (p_name is not null and p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5954 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_type (type: string) @@ -3696,16 +3872,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 6058 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + Filter Operator + predicate: (p_name is not null and p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 6058 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: string), _col3 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 6058 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 26 Data size: 6058 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -3714,7 +3893,7 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_type (type: string) @@ -3822,16 +4001,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -3840,7 +4022,7 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_type (type: string) @@ -3984,16 +4166,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 4 @@ -4057,15 +4242,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), UDFToLong(_col5) (type: bigint) - 1 _col1 (type: string), _col0 (type: bigint) + 1 _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4099,15 +4284,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) - Statistics: Num rows: 13 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 6 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4148,16 +4337,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_partkey is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), UDFToDouble(_col5) (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), UDFToDouble(_col5) (type: double) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), UDFToDouble(_col5) (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), UDFToDouble(_col5) (type: double) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -4187,15 +4379,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int), UDFToDouble(_col5) (type: double) - 1 _col1 (type: int), _col0 (type: double) + 1 _col0 (type: int), _col1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 8047 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 3714 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4209,15 +4401,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: double) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: double) - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4259,7 +4455,7 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) @@ -4284,16 +4480,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_partkey is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col5 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -4316,16 +4515,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_partkey (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4333,15 +4535,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col0 (type: int) + 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 5571 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 5571 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4383,15 +4585,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -4411,7 +4617,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -5170,16 +5376,19 @@ STAGE PLANS: TableScan alias: t Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (i is not null and j is not null) (type: boolean) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 @@ -5209,7 +5418,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToLong(_col0) (type: bigint), _col1 (type: int) 1 _col0 (type: bigint), _col1 (type: int) @@ -5231,15 +5440,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -5273,3 +5490,388 @@ POSTHOOK: query: drop table tt POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tt POSTHOOK: Output: default@tt +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string), _col5 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 12 Data size: 7428 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 7428 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col0 <> _col2)} + Statistics: Num rows: 338 Data size: 77402 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col2 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 338 Data size: 77402 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 25 Data size: 3125 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 12 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 5 <- Map 8 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col4 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: string), _col5 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col2 <> _col0)} + Statistics: Num rows: 338 Data size: 111202 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 338 Data size: 111202 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 366 Data size: 39528 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 366 Data size: 39528 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 183 Data size: 19764 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +PREHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out new file mode 100644 index 0000000000..0ffbaaea34 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out @@ -0,0 +1,2079 @@ +PREHOOK: query: DROP TABLE IF EXISTS part_subq +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS part_subq +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE part_subq( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_subq +POSTHOOK: query: CREATE TABLE part_subq( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_subq +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part_subq +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part_subq +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part_subq +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part_subq +PREHOOK: query: explain + select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: explain + select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 3895 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 3895 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 41 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +119 3 +128 3 +167 3 +187 3 +193 3 +199 3 +208 3 +273 3 +298 3 +311 3 +316 3 +318 3 +327 3 +35 3 +369 3 +384 3 +396 3 +403 3 +409 3 +417 3 +430 3 +431 3 +438 3 +454 3 +466 3 +480 3 +498 3 +5 3 +70 3 +90 3 +PREHOOK: query: explain +select p_mfgr, avg(p_size) +from part_subq b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part_subq + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, avg(p_size) +from part_subq b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part_subq + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(p_size) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Group By Operator + aggregations: max(p_size), min(p_size) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) < 20) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select p_mfgr, avg(p_size) +from part_subq b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part_subq + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, avg(p_size) +from part_subq b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part_subq + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(p_size) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Group By Operator + aggregations: max(p_size), min(p_size) + keys: p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) < 20) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select b.key, min(b.value) +from src b +group by b.key +having b.key in ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select b.key, min(b.value) +from src b +group by b.key +having b.key in ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 +PREHOOK: query: explain +select key, value, count(*) +from src b +where b.key in (select key from src where src.key > '8') +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, count(*) +from src b +where b.key in (select key from src where src.key > '8') +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '8') (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '8') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: bigint) + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 32 Data size: 5952 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 32 Data size: 5952 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 41 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, count(*) +from src b +where b.key in (select key from src where src.key > '8') +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, count(*) +from src b +where b.key in (select key from src where src.key > '8') +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +80 val_80 1 +82 val_82 1 +83 val_83 2 +84 val_84 2 +85 val_85 1 +86 val_86 1 +87 val_87 1 +9 val_9 1 +90 val_90 3 +92 val_92 1 +95 val_95 2 +96 val_96 1 +97 val_97 2 +98 val_98 2 +PREHOOK: query: explain +select key, value, count(*) +from src b +where b.key in (select key from src where src.key > '8') +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, count(*) +from src b +where b.key in (select key from src where src.key > '8') +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '8') (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 23674 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '8') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7221 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 66 Data size: 12276 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 32 Data size: 5952 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 32 Data size: 5952 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 41 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select key, value, count(*) +from src b +where b.key in (select key from src where src.value = b.value) +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, count(*) +from src b +where b.key in (select key from src where src.value = b.value) +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 404 Data size: 71912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 202 Data size: 37572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 202 Data size: 37572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 202 Data size: 37572 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col2 is not null (type: boolean) + Statistics: Num rows: 202 Data size: 37572 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 41 Data size: 7626 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7626 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 41 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, count(*) +from src b +where b.key in (select key from src where src.value = b.value) +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, count(*) +from src b +where b.key in (select key from src where src.value = b.value) +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 3 +10 val_10 1 +100 val_100 2 +103 val_103 2 +104 val_104 2 +105 val_105 1 +11 val_11 1 +111 val_111 1 +113 val_113 2 +114 val_114 1 +116 val_116 1 +118 val_118 2 +119 val_119 3 +12 val_12 2 +120 val_120 2 +125 val_125 2 +126 val_126 1 +128 val_128 3 +129 val_129 2 +131 val_131 1 +133 val_133 1 +134 val_134 2 +136 val_136 1 +137 val_137 2 +143 val_143 1 +145 val_145 1 +146 val_146 2 +149 val_149 2 +15 val_15 2 +150 val_150 1 +152 val_152 2 +153 val_153 1 +155 val_155 1 +156 val_156 1 +157 val_157 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 +163 val_163 1 +164 val_164 2 +165 val_165 2 +166 val_166 1 +167 val_167 3 +168 val_168 1 +17 val_17 1 +170 val_170 1 +172 val_172 2 +174 val_174 2 +175 val_175 2 +176 val_176 2 +177 val_177 1 +178 val_178 1 +179 val_179 2 +18 val_18 2 +180 val_180 1 +181 val_181 1 +183 val_183 1 +186 val_186 1 +187 val_187 3 +189 val_189 1 +19 val_19 1 +190 val_190 1 +191 val_191 2 +192 val_192 1 +193 val_193 3 +194 val_194 1 +195 val_195 2 +196 val_196 1 +197 val_197 2 +199 val_199 3 +2 val_2 1 +20 val_20 1 +200 val_200 2 +201 val_201 1 +202 val_202 1 +203 val_203 2 +205 val_205 2 +207 val_207 2 +208 val_208 3 +209 val_209 2 +213 val_213 2 +214 val_214 1 +216 val_216 2 +217 val_217 2 +218 val_218 1 +219 val_219 2 +221 val_221 2 +222 val_222 1 +223 val_223 2 +224 val_224 2 +226 val_226 1 +228 val_228 1 +229 val_229 2 +233 val_233 2 +235 val_235 1 +237 val_237 2 +238 val_238 2 +239 val_239 2 +24 val_24 2 +241 val_241 1 +242 val_242 2 +244 val_244 1 +247 val_247 1 +248 val_248 1 +249 val_249 1 +252 val_252 1 +255 val_255 2 +256 val_256 2 +257 val_257 1 +258 val_258 1 +26 val_26 2 +260 val_260 1 +262 val_262 1 +263 val_263 1 +265 val_265 2 +266 val_266 1 +27 val_27 1 +272 val_272 2 +273 val_273 3 +274 val_274 1 +275 val_275 1 +278 val_278 2 +28 val_28 1 +280 val_280 2 +281 val_281 2 +282 val_282 2 +283 val_283 1 +284 val_284 1 +285 val_285 1 +286 val_286 1 +287 val_287 1 +288 val_288 2 +289 val_289 1 +291 val_291 1 +292 val_292 1 +296 val_296 1 +298 val_298 3 +30 val_30 1 +302 val_302 1 +305 val_305 1 +306 val_306 1 +307 val_307 2 +308 val_308 1 +309 val_309 2 +310 val_310 1 +311 val_311 3 +315 val_315 1 +316 val_316 3 +317 val_317 2 +318 val_318 3 +321 val_321 2 +322 val_322 2 +323 val_323 1 +325 val_325 2 +327 val_327 3 +33 val_33 1 +331 val_331 2 +332 val_332 1 +333 val_333 2 +335 val_335 1 +336 val_336 1 +338 val_338 1 +339 val_339 1 +34 val_34 1 +341 val_341 1 +342 val_342 2 +344 val_344 2 +345 val_345 1 +35 val_35 3 +351 val_351 1 +353 val_353 2 +356 val_356 1 +360 val_360 1 +362 val_362 1 +364 val_364 1 +365 val_365 1 +366 val_366 1 +367 val_367 2 +368 val_368 1 +369 val_369 3 +37 val_37 2 +373 val_373 1 +374 val_374 1 +375 val_375 1 +377 val_377 1 +378 val_378 1 +379 val_379 1 +382 val_382 2 +384 val_384 3 +386 val_386 1 +389 val_389 1 +392 val_392 1 +393 val_393 1 +394 val_394 1 +395 val_395 2 +396 val_396 3 +397 val_397 2 +399 val_399 2 +4 val_4 1 +400 val_400 1 +402 val_402 1 +403 val_403 3 +404 val_404 2 +407 val_407 1 +409 val_409 3 +41 val_41 1 +411 val_411 1 +413 val_413 2 +414 val_414 2 +417 val_417 3 +418 val_418 1 +419 val_419 1 +42 val_42 2 +421 val_421 1 +424 val_424 2 +427 val_427 1 +429 val_429 2 +43 val_43 1 +430 val_430 3 +431 val_431 3 +432 val_432 1 +435 val_435 1 +436 val_436 1 +437 val_437 1 +438 val_438 3 +439 val_439 2 +44 val_44 1 +443 val_443 1 +444 val_444 1 +446 val_446 1 +448 val_448 1 +449 val_449 1 +452 val_452 1 +453 val_453 1 +454 val_454 3 +455 val_455 1 +457 val_457 1 +458 val_458 2 +459 val_459 2 +460 val_460 1 +462 val_462 2 +463 val_463 2 +466 val_466 3 +467 val_467 1 +47 val_47 1 +470 val_470 1 +472 val_472 1 +475 val_475 1 +477 val_477 1 +478 val_478 2 +479 val_479 1 +480 val_480 3 +481 val_481 1 +482 val_482 1 +483 val_483 1 +484 val_484 1 +485 val_485 1 +487 val_487 1 +490 val_490 1 +491 val_491 1 +492 val_492 2 +493 val_493 1 +494 val_494 1 +495 val_495 1 +496 val_496 1 +497 val_497 1 +498 val_498 3 +5 val_5 3 +51 val_51 2 +53 val_53 1 +54 val_54 1 +57 val_57 1 +58 val_58 2 +64 val_64 1 +65 val_65 1 +66 val_66 1 +67 val_67 2 +69 val_69 1 +70 val_70 3 +72 val_72 2 +74 val_74 1 +76 val_76 2 +77 val_77 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 +82 val_82 1 +83 val_83 2 +84 val_84 2 +85 val_85 1 +86 val_86 1 +87 val_87 1 +9 val_9 1 +90 val_90 3 +92 val_92 1 +95 val_95 2 +96 val_96 1 +97 val_97 2 +98 val_98 2 +PREHOOK: query: explain +select p_mfgr, p_name, avg(p_size) +from part_subq +group by p_mfgr, p_name +having p_name in + (select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p_mfgr, p_name, avg(p_size) +from part_subq +group by p_mfgr, p_name +having p_name in + (select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part_subq + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(p_size) + keys: p_name (type: string), p_mfgr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part_subq + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: p_mfgr (type: string), p_size (type: int) + sort order: ++ + Map-reduce partition columns: p_mfgr (type: string) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 409 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col1: string, _col2: string, _col5: int + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col5 ASC NULLS FIRST + partition by: _col2 + raw input shape: + window functions: + window function definition + alias: first_value_window_0 + arguments: _col1 + name: first_value + window function: GenericUDAFFirstValueEvaluator + window frame: RANGE PRECEDING(MAX)~CURRENT + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: first_value_window_0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: first_value_window_0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE src_null (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_null +POSTHOOK: query: CREATE TABLE src_null (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_null +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@src_null +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@src_null +PREHOOK: query: INSERT INTO src_null values('5444', null) +PREHOOK: type: QUERY +PREHOOK: Output: default@src_null +POSTHOOK: query: INSERT INTO src_null values('5444', null) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@src_null +POSTHOOK: Lineage: src_null.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: src_null.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +Warning: Map Join MAPJOIN[123][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[124][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[126][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[125][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[127][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +PREHOOK: query: explain +select key, value, count(*) +from src_null b +where NOT EXISTS (select key from src_null where src_null.value <> b.value) +group by key, value +having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, count(*) +from src_null b +where NOT EXISTS (select key from src_null where src_null.value <> b.value) +group by key, value +having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 12 <- Map 10 (SIMPLE_EDGE) + Reducer 13 <- Map 10 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) + Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (XPROD_EDGE), Reducer 9 (XPROD_EDGE) + Reducer 7 <- Reducer 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 9 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Reducer 11 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Reducer 12 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Reducer 13 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 10 + Map Operator Tree: + TableScan + alias: src_null + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 11 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: boolean) + Reducer 12 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: boolean) + Reducer 13 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: boolean) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 1 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string), _col2 (type: bigint) + 1 _col1 (type: string), _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 + input vertices: + 1 Reducer 8 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 668 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col2) + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + residual filter predicates: {(_col1 <> _col2)} + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 922 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 553 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[123][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[124][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[126][bigTable=?] in task 'Map 1' is a cross product +Warning: Shuffle Join MERGEJOIN[125][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[127][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +PREHOOK: query: select key, value, count(*) +from src_null b +where NOT EXISTS (select key from src_null where src_null.value <> b.value) +group by key, value +having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_null +#### A masked pattern was here #### +POSTHOOK: query: select key, value, count(*) +from src_null b +where NOT EXISTS (select key from src_null where src_null.value <> b.value) +group by key, value +having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_null +#### A masked pattern was here #### +5444 NULL 1 +PREHOOK: query: DROP TABLE src_null +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_null +PREHOOK: Output: default@src_null +POSTHOOK: query: DROP TABLE src_null +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_null +POSTHOOK: Output: default@src_null +PREHOOK: query: DROP TABLE part_subq +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_subq +PREHOOK: Output: default@part_subq +POSTHOOK: query: DROP TABLE part_subq +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_subq +POSTHOOK: Output: default@part_subq diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 96fe17a05a..d1579033ac 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -1663,14 +1663,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1678,25 +1677,28 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col4 (type: string), _col1 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col4 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 12 + Map 11 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) @@ -1725,27 +1727,31 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string), p_name (type: string) - mode: hash + Select Operator + expressions: p_type (type: string), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) @@ -1768,27 +1774,6 @@ STAGE PLANS: Reducer 10 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string), _col0 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 14 Data size: 2744 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col2 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 11 - Execution mode: llap - Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1812,10 +1797,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1833,14 +1818,14 @@ STAGE PLANS: keys: 0 _col4 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col4 (type: string) Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1850,10 +1835,10 @@ STAGE PLANS: keys: 0 _col3 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col14 Statistics: Num rows: 16 Data size: 3891 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) + predicate: (not CASE WHEN ((_col10 = 0)) THEN (false) WHEN (_col10 is null) THEN (false) WHEN (_col14 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col11 < _col10)) THEN (true) ELSE (false) END) (type: boolean) Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -1866,24 +1851,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Reducer 7 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1906,7 +1874,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 7 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 9 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1921,6 +1889,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 7 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 2744 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1992,7 +1981,7 @@ STAGE PLANS: alias: part_null Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_brand is not null and p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) @@ -2032,7 +2021,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), p_type (type: string) @@ -2154,14 +2143,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 11 <- Reducer 10 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 11 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 9 <- Map 11 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2169,25 +2157,28 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_container is not null and p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string), _col1 (type: string), _col6 (type: string) - sort order: +++ - Map-reduce partition columns: _col4 (type: string), _col1 (type: string), _col6 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col4 (type: string), _col1 (type: string), _col6 (type: string) + sort order: +++ + Map-reduce partition columns: _col4 (type: string), _col1 (type: string), _col6 (type: string) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 12 + Map 11 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string), p_brand (type: string) @@ -2216,27 +2207,31 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_container = p_container) and (p_type = p_type)) (type: boolean) - Statistics: Num rows: 6 Data size: 1902 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string), p_name (type: string), p_container (type: string) - mode: hash + predicate: (p_container is not null and p_name is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_type (type: string), p_name (type: string), p_container (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 13 Data size: 4121 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 7488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) @@ -2259,65 +2254,40 @@ STAGE PLANS: Reducer 10 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string), _col0 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 14 Data size: 2744 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col1 (type: string), _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 11 - Execution mode: llap - Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 7 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 7 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: boolean) + value expressions: _col2 (type: boolean) Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col1 (type: string), _col6 (type: string) - 1 _col1 (type: string), _col0 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4533 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 3 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4533 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2328,14 +2298,14 @@ STAGE PLANS: keys: 0 _col4 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14 - Statistics: Num rows: 7 Data size: 924 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 + Statistics: Num rows: 15 Data size: 4986 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string) + key expressions: _col3 (type: string), _col4 (type: string) sort order: ++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string) - Statistics: Num rows: 7 Data size: 924 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col13 (type: bigint), _col14 (type: bigint) + Map-reduce partition columns: _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 15 Data size: 4986 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2343,42 +2313,25 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 keys: - 0 _col4 (type: string), _col3 (type: string) - 1 _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col17 - Statistics: Num rows: 7 Data size: 1016 Basic stats: COMPLETE Column stats: NONE + 0 _col3 (type: string), _col4 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col14 + Statistics: Num rows: 16 Data size: 5484 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col13 = 0)) THEN (false) WHEN (_col13 is null) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col14 < _col13)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 4 Data size: 580 Basic stats: COMPLETE Column stats: NONE + predicate: (not CASE WHEN ((_col10 = 0)) THEN (false) WHEN (_col10 is null) THEN (false) WHEN (_col14 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col11 < _col10)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 4 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 580 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2742 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col2 (type: string) - Statistics: Num rows: 3 Data size: 951 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Reducer 7 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -2401,7 +2354,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 7 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 9 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2416,6 +2369,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 7 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 14 Data size: 2744 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col2 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 7 Data size: 1372 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2433,32 +2407,32 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null #### A masked pattern was here #### -17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve -15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s -191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle 105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car 146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about 17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful 42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl -195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de -144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_type from part where part.p_size = part_null.p_size) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_type from part where part.p_size = part_null.p_size) @@ -2472,12 +2446,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 8 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2485,16 +2458,19 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -2503,21 +2479,25 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_name (type: string), p_type (type: string) - mode: hash + Select Operator + expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 5850 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -2554,7 +2534,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -2575,14 +2555,14 @@ STAGE PLANS: keys: 0 _col5 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string), _col5 (type: int) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col5 (type: int) Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2592,10 +2572,10 @@ STAGE PLANS: keys: 0 _col3 (type: string), _col5 (type: int) 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col14 Statistics: Num rows: 16 Data size: 3891 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) + predicate: (not CASE WHEN ((_col10 = 0)) THEN (false) WHEN (_col10 is null) THEN (false) WHEN (_col14 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col11 < _col10)) THEN (true) ELSE (false) END) (type: boolean) Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -2608,20 +2588,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 8 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2636,7 +2603,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 9 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -2714,8 +2681,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2723,16 +2689,19 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -2741,7 +2710,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) @@ -2761,18 +2730,22 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string), p_brand (type: string) - mode: hash + Select Operator + expressions: p_type (type: string), p_brand (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 5096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2798,10 +2771,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col2 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 14 Data size: 3150 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -2818,23 +2791,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 13 Data size: 2548 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2878,7 +2834,7 @@ POSTHOOK: Input: default@part_null 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve 33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful 78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[62][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -2933,7 +2889,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) (type: boolean) Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) @@ -2953,16 +2909,16 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 10400 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: l_orderkey (type: int), l_linenumber (type: int), l_quantity (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 721 Basic stats: COMPLETE Column stats: COMPLETE + expressions: l_orderkey (type: int), l_quantity (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 7 Data size: 721 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double) + Statistics: Num rows: 2 Data size: 206 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: double) Execution mode: llap LLAP IO: no inputs Map 9 @@ -3064,14 +3020,14 @@ STAGE PLANS: 0 _col1 (type: int), _col4 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col3 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3085,14 +3041,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 7 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col2, _col4, _col5 + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: double) sort order: + Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 7 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: bigint), _col5 (type: bigint) + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col4 (type: bigint), _col5 (type: bigint) Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -3102,25 +3058,25 @@ STAGE PLANS: keys: 0 _col2 (type: double) 1 _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7 - Statistics: Num rows: 7 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col2, _col4, _col5, _col7 + Statistics: Num rows: 2 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 4 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((_col4 = 0) or (_col7 is null and _col2 is not null and (_col5 >= _col4))) (type: boolean) + Statistics: Num rows: 2 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: int) + expressions: _col0 (type: int), 1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3128,7 +3084,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[62][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 7' is a cross product PREHOOK: query: select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and @@ -3180,17 +3136,20 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key > '9') (type: boolean) + predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -3210,7 +3169,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) @@ -3686,7 +3645,7 @@ POSTHOOK: Input: default@src 409 val_409 3 119 val_119 3 35 val_35 3 -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_name IN (select p_name from part p where part.p_type <> '1') PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_name IN (select p_name from part p where part.p_type <> '1') @@ -3710,16 +3669,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: ((p_type <> '1') and p_name is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -3727,14 +3689,17 @@ STAGE PLANS: TableScan alias: p Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 @@ -3815,7 +3780,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[28][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_name IN (select p_name from part p where part.p_type <> '1') PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 8e2ca937af..2dd65b44be 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -341,23 +341,23 @@ STAGE PLANS: Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: int) Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Execution mode: llap @@ -372,12 +372,12 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 26 Data size: 5894 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 5894 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5974 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 Execution mode: llap @@ -389,10 +389,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 - Statistics: Num rows: 26 Data size: 5906 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5994 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 13 Data size: 2955 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 3007 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -410,7 +410,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -431,25 +431,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col0) keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 5 Execution mode: llap @@ -459,12 +459,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -472,7 +472,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -493,24 +493,24 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 6383 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 3928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -518,19 +518,19 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 438 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 876 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 446 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 892 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -925,25 +925,25 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1326 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 2652 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: llap LLAP IO: no inputs @@ -953,7 +953,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -974,25 +974,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Reducer 11 Execution mode: llap @@ -1002,23 +1002,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 2 Execution mode: llap @@ -1030,19 +1030,19 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 26 Data size: 5846 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5886 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col4, true) > 0) (type: boolean) - Statistics: Num rows: 8 Data size: 1808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1816 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 8 Data size: 1808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1816 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: int) Reducer 3 Execution mode: llap @@ -1071,10 +1071,10 @@ STAGE PLANS: 0 _col1 (type: string), _col2 (type: int) 1 _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col10 - Statistics: Num rows: 8 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 1924 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col6 = 0)) THEN (false) WHEN (_col6 is null) THEN (false) WHEN (_col10 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col7 < _col6)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 4 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 964 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -1092,7 +1092,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1113,24 +1113,24 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -1138,18 +1138,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 8 Execution mode: llap @@ -1157,7 +1157,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1178,25 +1178,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 4810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 9620 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 1480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Reducer 9 Execution mode: llap @@ -1206,18 +1206,18 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 @@ -3017,30 +3017,30 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_partkey = p_partkey) and (p_size = p_size)) (type: boolean) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_partkey is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(p_name) keys: p_partkey (type: int), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Group By Operator keys: p_partkey (type: int), p_name (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3053,12 +3053,12 @@ STAGE PLANS: 0 _col0 (type: int), _col5 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 - Statistics: Num rows: 26 Data size: 16158 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16318 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) - Statistics: Num rows: 26 Data size: 16158 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16318 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: bigint), _col12 (type: bigint) Reducer 3 Execution mode: llap @@ -3070,10 +3070,10 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col16 - Statistics: Num rows: 26 Data size: 16174 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16374 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col11 = 0)) THEN (false) WHEN (_col11 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col12 < _col11)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 13 Data size: 8087 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 8187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -3093,12 +3093,12 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 6 Execution mode: llap @@ -3107,23 +3107,23 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 387 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1677 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1729 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 1729 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index c89d053b4a..06a929dd0a 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -2068,7 +2068,7 @@ POSTHOOK: Input: default@part_null 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 @@ -2083,9 +2083,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2094,7 +2093,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_size <> 340) (type: boolean) + predicate: ((p_size <> 340) and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) @@ -2132,18 +2131,22 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: ((p_size <> 340) and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: p_type (type: string), p_size (type: int) - mode: hash + Select Operator + expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 2808 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2169,10 +2172,10 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col5 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 8666 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -2194,23 +2197,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Reducer 7 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 13 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2218,7 +2204,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2290,19 +2276,19 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((p_partkey = p_partkey) and (p_size = p_size)) (type: boolean) - Statistics: Num rows: 6 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_partkey is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(p_name) keys: p_partkey (type: int), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2316,10 +2302,10 @@ STAGE PLANS: 0 _col0 (type: int), _col5 (type: int) 1 _col2 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 26 Data size: 16142 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16262 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (UDFToLong(_col5) <> CASE WHEN (_col10 is null) THEN (0) ELSE (_col9) END) (type: boolean) - Statistics: Num rows: 26 Data size: 16142 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 16262 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -2339,16 +2325,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), true (type: boolean), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Stage: Stage-0 @@ -3851,12 +3837,12 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: l_orderkey, l_linenumber - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(l_orderkey) keys: l_linenumber (type: int) @@ -4060,12 +4046,12 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 9600 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: l_orderkey, l_linenumber - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 14 Data size: 1344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(l_orderkey) keys: l_linenumber (type: int) @@ -4379,16 +4365,19 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -4397,7 +4386,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 118f6ebccf..514a7889b3 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -1776,7 +1776,7 @@ POSTHOOK: Input: default@part 2 46 46 46 23 46 -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from src b @@ -1804,10 +1804,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE) - Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1815,29 +1814,32 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: src @@ -1862,15 +1864,15 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 66 Data size: 11748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 66 Data size: 11748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1880,36 +1882,27 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 1 outputColumnNames: _col1, _col2 - Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col2 (type: string), _col1 (type: string) - mode: hash + Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 6 + Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1917,10 +1910,13 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -1928,7 +1924,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[30][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[31][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from src b where b.key in @@ -1988,27 +1984,30 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -2036,10 +2035,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 66 Data size: 11748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 108 Data size: 19224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 66 Data size: 11748 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 108 Data size: 19224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2049,26 +2048,26 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 83 Data size: 20337 Basic stats: COMPLETE Column stats: COMPLETE + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 134 Data size: 36850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END (type: string), _col1 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 22825 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 134 Data size: 36850 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 18425 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 11275 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 18425 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2078,16 +2077,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: string), true (type: boolean), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: boolean) + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_views.q.out b/ql/src/test/results/clientpositive/llap/subquery_views.q.out index a9a81133b5..af695691a7 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_views.q.out @@ -157,33 +157,33 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < '11') (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE @@ -197,33 +197,33 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(key) keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 11 @@ -260,12 +260,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 12194 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col2 (type: string) - Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 12194 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Reducer 12 Execution mode: llap @@ -305,19 +305,19 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) 2 _col4 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col4, _col5, _col9 - Statistics: Num rows: 19 Data size: 3762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 87 Data size: 17226 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col9 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 9 Data size: 1782 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43 Data size: 8514 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43 Data size: 7654 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 9 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43 Data size: 7654 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Reducer 3 Execution mode: llap @@ -329,10 +329,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 9 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43 Data size: 7654 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 1602 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43 Data size: 7654 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -345,12 +345,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 5 Execution mode: llap @@ -359,16 +359,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: boolean) Reducer 6 Execution mode: llap @@ -380,12 +380,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3, _col4 - Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 12194 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col2 (type: string) - Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 67 Data size: 12194 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean) Reducer 7 Execution mode: llap @@ -399,24 +399,24 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) 2 _col4 (type: string), _col2 (type: string) outputColumnNames: _col0, _col4, _col5, _col9 - Statistics: Num rows: 19 Data size: 2033 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 87 Data size: 9309 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: CASE WHEN ((_col4 = 0)) THEN (true) WHEN (_col4 is null) THEN (true) WHEN (_col9 is not null) THEN (false) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (false) ELSE (true) END (type: boolean) - Statistics: Num rows: 9 Data size: 963 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43 Data size: 4601 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 9 Data size: 783 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43 Data size: 3741 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 696 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -425,12 +425,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 3880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 16102 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 9 Execution mode: llap @@ -439,16 +439,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 3640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 4e6f00f6b7..37821fb6a0 100644 --- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -356,8 +356,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3:int, val 1), SelectColumnIsNotNull(col 1:int)) - predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3:int, val 1), SelectColumnIsNotNull(col 1:int), SelectColumnIsNotNull(col 0:int)) + predicate: ((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) (type: boolean) Statistics: Num rows: 14 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) @@ -401,29 +401,30 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14:string, val AIR), FilterLongColEqualLongColumn(col 3:int, col 3:int)) - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) - Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14:string, val AIR), FilterLongColEqualLongScalar(col 3:int, val 1), SelectColumnIsNotNull(col 0:int)) + predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: l_orderkey (type: int), l_linenumber (type: int) + expressions: l_orderkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3] - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 17] + selectExpressions: ConstantVectorExpression(val 1) -> 17:int + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 3:int + keyExpressions: col 0:int, col 17:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -432,7 +433,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -493,7 +494,7 @@ STAGE PLANS: outputColumnNames: _col0, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col3 (type: int) outputColumnNames: _col0, _col1 @@ -501,13 +502,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2] - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/masking_12.q.out b/ql/src/test/results/clientpositive/masking_12.q.out index 540c53e825..c7a7b1bca5 100644 --- a/ql/src/test/results/clientpositive/masking_12.q.out +++ b/ql/src/test/results/clientpositive/masking_12.q.out @@ -125,9 +125,8 @@ STAGE PLANS: properties: insideView TRUE Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) @@ -158,16 +157,19 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) @@ -204,16 +206,19 @@ STAGE PLANS: properties: insideView TRUE Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) diff --git a/ql/src/test/results/clientpositive/masking_3.q.out b/ql/src/test/results/clientpositive/masking_3.q.out index 1114c80676..3a3547dacc 100644 --- a/ql/src/test/results/clientpositive/masking_3.q.out +++ b/ql/src/test/results/clientpositive/masking_3.q.out @@ -27,9 +27,8 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) @@ -60,16 +59,19 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) @@ -104,16 +106,19 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) @@ -670,26 +675,25 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -703,22 +707,25 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator @@ -1313,26 +1320,25 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1346,22 +1352,25 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator @@ -1955,26 +1964,25 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1988,22 +1996,25 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator @@ -2603,9 +2614,8 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) @@ -2636,16 +2646,19 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) @@ -6913,26 +6926,25 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -6946,22 +6958,25 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator @@ -7556,26 +7571,25 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -7589,22 +7603,25 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/masking_4.q.out b/ql/src/test/results/clientpositive/masking_4.q.out index 527da21610..e172201785 100644 --- a/ql/src/test/results/clientpositive/masking_4.q.out +++ b/ql/src/test/results/clientpositive/masking_4.q.out @@ -165,9 +165,8 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: key + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) @@ -198,16 +197,19 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) @@ -242,16 +244,19 @@ STAGE PLANS: TableScan alias: masking_test_subq Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToDouble(_col0) (type: double), _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) TableScan Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double), _col1 (type: int) diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out b/ql/src/test/results/clientpositive/perf/spark/query10.q.out index eb3a2f6699..bdc247da2b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out @@ -126,7 +126,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 12 + Map 10 Map Operator Tree: TableScan alias: date_dim @@ -149,7 +149,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 13 Map Operator Tree: TableScan alias: date_dim @@ -172,7 +172,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 18 + Map 16 Map Operator Tree: TableScan alias: date_dim @@ -194,15 +194,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 169) - Reducer 14 <- Map 13 (GROUP, 437) - Reducer 17 <- Map 16 (GROUP, 336) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 8 (PARTITION-LEVEL SORT, 697) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 953), Reducer 14 (PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953) - Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648) - Reducer 6 <- Reducer 5 (GROUP, 529) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 12 <- Map 11 (GROUP, 169) + Reducer 15 <- Map 14 (GROUP, 336) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 7 (PARTITION-LEVEL SORT, 697) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) + Reducer 4 <- Map 9 (PARTITION-LEVEL SORT, 1009), Reducer 12 (PARTITION-LEVEL SORT, 1009), Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Reducer 4 (GROUP, 1009) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -211,7 +209,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null) (type: boolean) + predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int) @@ -223,7 +221,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) - Map 10 + Map 11 Map Operator Tree: TableScan alias: web_sales @@ -243,7 +241,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 12 + 1 Map 13 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: int) @@ -257,41 +255,7 @@ STAGE PLANS: Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 13 - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - input vertices: - 1 Map 15 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Map 16 + Map 14 Map Operator Tree: TableScan alias: catalog_sales @@ -311,7 +275,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 18 + 1 Map 16 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: int) @@ -325,7 +289,7 @@ STAGE PLANS: Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 8 + Map 7 Map Operator Tree: TableScan alias: ca @@ -342,7 +306,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 20000000 Data size: 20297597642 Basic stats: COMPLETE Column stats: NONE - Map 9 + Map 8 Map Operator Tree: TableScan alias: customer_demographics @@ -360,7 +324,45 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) - Reducer 11 + Map 9 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 10 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Local Work: + Map Reduce Local Work + Reducer 12 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -377,19 +379,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 14 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reducer 17 + Reducer 15 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -442,78 +432,60 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - Inner Join 0 to 2 + Left Semi Join 0 to 1 + Left Outer Join 0 to 2 + Left Outer Join 0 to 3 keys: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int), _col15 (type: boolean) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col16 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int), _col16 (type: boolean) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col16, _col18 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 3 _col0 (type: int) + outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col15, _col17 + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col16 is not null or _col18 is not null) (type: boolean) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + predicate: (_col15 is not null or _col17 is not null) (type: boolean) + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int) outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: int), _col12 (type: int), _col13 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int) sort order: ++++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col8 (type: bigint) - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string), KEY._col5 (type: int), KEY._col6 (type: int), KEY._col7 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col8 (type: bigint), _col3 (type: int), _col4 (type: string), _col5 (type: int), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6, _col8, _col10, _col12 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: int), _col6 (type: string), _col8 (type: int), _col10 (type: int), _col12 (type: int) sort order: ++++++++ - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) - Reducer 7 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey4 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey6 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey7 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query16.q.out b/ql/src/test/results/clientpositive/perf/spark/query16.q.out index b74d721d41..fd31b22e7a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query16.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join JOIN[33][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Work 'Reducer 17' is a cross product PREHOOK: query: explain select count(distinct cs_order_number) as `order count` @@ -70,7 +69,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 9 Map Operator Tree: TableScan alias: call_center @@ -93,7 +92,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -116,16 +115,11 @@ STAGE PLANS: Spark Edges: Reducer 12 <- Map 11 (GROUP, 24) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009) - Reducer 15 <- Reducer 14 (GROUP, 1009) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 1), Map 19 (PARTITION-LEVEL SORT, 1), Map 20 (PARTITION-LEVEL SORT, 1), Map 21 (PARTITION-LEVEL SORT, 1) - Reducer 18 <- Reducer 17 (GROUP, 1009) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 9 (PARTITION-LEVEL SORT, 494) - Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 418), Reducer 2 (PARTITION-LEVEL SORT, 418) - Reducer 4 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 4 (GROUP, 1009) - Reducer 6 <- Reducer 5 (GROUP, 1) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 494), Map 8 (PARTITION-LEVEL SORT, 494) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 723), Reducer 12 (PARTITION-LEVEL SORT, 723), Reducer 2 (PARTITION-LEVEL SORT, 723) + Reducer 4 <- Reducer 3 (GROUP, 447) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -134,7 +128,7 @@ STAGE PLANS: alias: cs1 Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) (type: boolean) + predicate: (cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_ship_date_sk (type: int), cs_ship_addr_sk (type: int), cs_call_center_sk (type: int), cs_warehouse_sk (type: int), cs_order_number (type: int), cs_ext_ship_cost (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)) @@ -148,7 +142,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: - 1 Map 8 + 1 Map 7 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) @@ -158,6 +152,29 @@ STAGE PLANS: value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Local Work: Map Reduce Local Work + Map 10 + Map Operator Tree: + TableScan + alias: cs2 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cs_order_number is not null and cs_warehouse_sk is not null) (type: boolean) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cs_order_number (type: int), cs_warehouse_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Map 11 Map Operator Tree: TableScan @@ -176,65 +193,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - Map 13 - Map Operator Tree: - TableScan - alias: cs2 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_warehouse_sk (type: int), cs_order_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Map 16 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 73049 Data size: 292196 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 73049 Data size: 292196 Basic stats: COMPLETE Column stats: COMPLETE - Map 19 - Map Operator Tree: - TableScan - alias: call_center - Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 60 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 60 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE - Map 20 - Map Operator Tree: - TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 40000000 Data size: 160000000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 40000000 Data size: 160000000 Basic stats: COMPLETE Column stats: COMPLETE - Map 21 - Map Operator Tree: - TableScan - alias: cs1 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: cs_warehouse_sk (type: int), cs_order_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - Map 9 + Map 8 Map Operator Tree: TableScan alias: customer_address @@ -268,86 +227,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14399440 Data size: 1528617286 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2 <> _col0) (type: boolean) - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: int) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: int), _col3 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Reducer 15 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2536427365110644736 Data size: 2536427365110644736 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2536427365110644736 Data size: 2536427365110644736 Basic stats: COMPLETE Column stats: NONE - Reducer 17 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - keys: - 0 - 1 - 2 - 3 - outputColumnNames: _col3, _col4 - Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col4 (type: int), _col3 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Reducer 18 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reducer 2 Local Work: Map Reduce Local Work @@ -368,7 +247,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col3, _col4, _col5, _col6 input vertices: - 1 Map 10 + 1 Map 9 Statistics: Num rows: 383314495 Data size: 51908482889 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int) @@ -380,59 +259,46 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 + Left Outer Join 0 to 2 keys: 0 _col4 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col14 - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int), _col4 (type: int) - sort order: ++ - Map-reduce partition columns: _col3 (type: int), _col4 (type: int) - Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col5, _col6, _col14 - Statistics: Num rows: 2790070162094850048 Data size: 2790070162094850048 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col16 + residual filter predicates: {(_col3 <> _col14)} + Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean) + expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col16 (type: boolean) outputColumnNames: _col4, _col5, _col6, _col16 - Statistics: Num rows: 2790070162094850048 Data size: 2790070162094850048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 843291907 Data size: 114198664830 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col16 is null (type: boolean) - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) outputColumnNames: _col4, _col5, _col6 - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col6) keys: _col4 (type: int) mode: hash outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) - Reducer 5 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: int) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421645953 Data size: 57099332347 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col1), sum(_col2) mode: partial2 @@ -442,7 +308,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -459,7 +325,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 7 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) diff --git a/ql/src/test/results/clientpositive/perf/spark/query35.q.out b/ql/src/test/results/clientpositive/perf/spark/query35.q.out index 8759b71b8c..703131566a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query35.q.out @@ -118,18 +118,16 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 154), Map 13 (PARTITION-LEVEL SORT, 154) - Reducer 12 <- Reducer 11 (GROUP, 169) - Reducer 15 <- Map 14 (PARTITION-LEVEL SORT, 398), Map 17 (PARTITION-LEVEL SORT, 398) - Reducer 16 <- Reducer 15 (GROUP, 437) - Reducer 19 <- Map 18 (PARTITION-LEVEL SORT, 305), Map 21 (PARTITION-LEVEL SORT, 305) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 8 (PARTITION-LEVEL SORT, 855) - Reducer 20 <- Reducer 19 (GROUP, 336) - Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) - Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 953), Reducer 16 (PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953) - Reducer 5 <- Reducer 20 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648) - Reducer 6 <- Reducer 5 (GROUP, 529) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 398), Map 9 (PARTITION-LEVEL SORT, 398) + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 154), Map 15 (PARTITION-LEVEL SORT, 154) + Reducer 14 <- Reducer 13 (GROUP, 169) + Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 305), Map 19 (PARTITION-LEVEL SORT, 305) + Reducer 18 <- Reducer 17 (GROUP, 336) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 855), Map 7 (PARTITION-LEVEL SORT, 855) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) + Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 1009), Reducer 14 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Reducer 4 (GROUP, 1009) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -138,7 +136,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null) (type: boolean) + predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int) @@ -150,25 +148,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) - Map 10 - Map Operator Tree: - TableScan - alias: web_sales - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 13 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -185,25 +165,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE - Map 14 + Map 12 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Map 17 + Map 15 Map Operator Tree: TableScan alias: date_dim @@ -220,7 +200,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE - Map 18 + Map 16 Map Operator Tree: TableScan alias: catalog_sales @@ -238,7 +218,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Map 21 + Map 19 Map Operator Tree: TableScan alias: date_dim @@ -255,7 +235,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 12174 Data size: 13622706 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 7 Map Operator Tree: TableScan alias: ca @@ -273,7 +253,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 9 + Map 8 Map Operator Tree: TableScan alias: customer_demographics @@ -291,7 +271,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int) - Reducer 11 + Map 9 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reducer 13 Reduce Operator Tree: Join Operator condition map: @@ -311,7 +333,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reducer 12 + Reducer 14 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -328,7 +350,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 15 + Reducer 17 Reduce Operator Tree: Join Operator condition map: @@ -337,49 +359,34 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE - Reducer 16 + Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Reducer 18 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reducer 19 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator @@ -396,23 +403,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 88000001 Data size: 75681779077 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col4 (type: string) - Reducer 20 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) Reducer 3 Reduce Operator Tree: Join Operator @@ -433,78 +423,60 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - Inner Join 0 to 2 + Left Semi Join 0 to 1 + Left Outer Join 0 to 2 + Left Outer Join 0 to 3 keys: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col12 (type: boolean) - outputColumnNames: _col0, _col4, _col6, _col7, _col8, _col9, _col10, _col13 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int), _col13 (type: boolean) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10, _col13, _col15 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + 3 _col0 (type: int) + outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10, _col12, _col14 + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col13 is not null or _col15 is not null) (type: boolean) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + predicate: (_col12 is not null or _col14 is not null) (type: boolean) + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), avg(_col8), max(_col8), sum(_col8), avg(_col9), max(_col9), sum(_col9), avg(_col10), max(_col10), sum(_col10) keys: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int) sort order: ++++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int) - Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col6 (type: bigint), _col7 (type: struct), _col8 (type: int), _col9 (type: bigint), _col10 (type: struct), _col11 (type: int), _col12 (type: bigint), _col13 (type: struct), _col14 (type: int), _col15 (type: bigint) - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), avg(VALUE._col1), max(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), avg(VALUE._col7), max(VALUE._col8), sum(VALUE._col9) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col6 (type: bigint), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col4 (type: int), _col10 (type: double), _col11 (type: int), _col12 (type: bigint), _col5 (type: int), _col13 (type: double), _col14 (type: int), _col15 (type: bigint), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col14, _col15, _col16, _col17 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col17 (type: int), _col7 (type: int), _col12 (type: int) sort order: ++++++ - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: int), _col6 (type: bigint), _col9 (type: double), _col10 (type: int), _col11 (type: bigint), _col14 (type: double), _col15 (type: int), _col16 (type: bigint) - Reducer 7 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: int), VALUE._col3 (type: bigint), KEY.reducesinkkey4 (type: int), VALUE._col0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: int), VALUE._col6 (type: bigint), KEY.reducesinkkey5 (type: int), VALUE._col0 (type: bigint), VALUE._col7 (type: double), VALUE._col8 (type: int), VALUE._col9 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query69.q.out b/ql/src/test/results/clientpositive/perf/spark/query69.q.out index e4430beaac..efb3f301d6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query69.q.out @@ -102,7 +102,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 12 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -125,7 +125,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 15 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -148,7 +148,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 18 + Map 17 Map Operator Tree: TableScan alias: date_dim @@ -170,14 +170,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (GROUP, 169) - Reducer 14 <- Map 13 (GROUP, 437) - Reducer 17 <- Map 16 (GROUP, 336) + Reducer 13 <- Map 12 (GROUP, 169) + Reducer 16 <- Map 15 (GROUP, 336) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 697), Map 8 (PARTITION-LEVEL SORT, 697) Reducer 3 <- Map 9 (PARTITION-LEVEL SORT, 597), Reducer 2 (PARTITION-LEVEL SORT, 597) - Reducer 4 <- Reducer 11 (PARTITION-LEVEL SORT, 953), Reducer 14 (PARTITION-LEVEL SORT, 953), Reducer 3 (PARTITION-LEVEL SORT, 953) - Reducer 5 <- Reducer 17 (PARTITION-LEVEL SORT, 408), Reducer 4 (PARTITION-LEVEL SORT, 408) - Reducer 6 <- Reducer 5 (GROUP, 133) + Reducer 4 <- Map 10 (PARTITION-LEVEL SORT, 1009), Reducer 13 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) + Reducer 5 <- Reducer 16 (PARTITION-LEVEL SORT, 648), Reducer 4 (PARTITION-LEVEL SORT, 648) + Reducer 6 <- Reducer 5 (GROUP, 265) Reducer 7 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: @@ -187,7 +186,7 @@ STAGE PLANS: alias: c Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null) (type: boolean) + predicate: (c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 80000000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_current_cdemo_sk (type: int), c_current_addr_sk (type: int) @@ -202,15 +201,15 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: web_sales - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + alias: store_sales + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) + expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -219,32 +218,36 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 12 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: int) - mode: hash + 1 Map 11 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 13 + Map 12 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + alias: web_sales + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int) + expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -253,21 +256,21 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 15 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + 1 Map 14 + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 633595212 Data size: 55895953508 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work - Map 16 + Map 15 Map Operator Tree: TableScan alias: catalog_sales @@ -287,7 +290,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 1 Map 18 + 1 Map 17 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: int) @@ -336,7 +339,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1861800 Data size: 717186159 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: string) - Reducer 11 + Reducer 13 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -353,19 +356,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 14 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE - Reducer 17 + Reducer 16 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) @@ -418,31 +409,27 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 - Inner Join 0 to 2 + Left Semi Join 0 to 1 + Left Outer Join 0 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col12 (type: boolean) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10, _col13 + Statistics: Num rows: 1393909496 Data size: 122971100382 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col12 is null (type: boolean) Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col13 is null (type: boolean) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) - outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 348477374 Data size: 30742775095 Basic stats: COMPLETE Column stats: NONE - value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) + Select Operator + expressions: _col0 (type: int), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) + outputColumnNames: _col0, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 696954748 Data size: 61485550191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) Reducer 5 Reduce Operator Tree: Join Operator @@ -451,26 +438,26 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col15 - Statistics: Num rows: 383325119 Data size: 33817053337 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col6, _col7, _col8, _col9, _col10, _col14 + Statistics: Num rows: 766650239 Data size: 67634106676 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col15 is null (type: boolean) - Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE + predicate: _col14 is null (type: boolean) + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) outputColumnNames: _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string) - Statistics: Num rows: 191662559 Data size: 16908526624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 383325119 Data size: 33817053293 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col5 (type: bigint) Reducer 6 @@ -480,15 +467,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: bigint), _col3 (type: int), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col6 - Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: int), _col6 (type: string) sort order: +++++ - Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) Reducer 7 @@ -496,7 +483,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: bigint), KEY.reducesinkkey3 (type: int), VALUE._col0 (type: bigint), KEY.reducesinkkey4 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 95831279 Data size: 8454263267 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 191662559 Data size: 16908526602 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 8800 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/perf/spark/query94.q.out b/ql/src/test/results/clientpositive/perf/spark/query94.q.out index 43b8c77bdc..995875aa84 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query94.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join JOIN[33][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Work 'Reducer 17' is a cross product PREHOOK: query: explain select count(distinct ws_order_number) as `order count` @@ -66,7 +65,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 10 + Map 9 Map Operator Tree: TableScan alias: web_site @@ -89,7 +88,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 8 + Map 7 Map Operator Tree: TableScan alias: date_dim @@ -112,16 +111,11 @@ STAGE PLANS: Spark Edges: Reducer 12 <- Map 11 (GROUP, 11) - Reducer 14 <- Map 13 (PARTITION-LEVEL SORT, 1009), Reducer 18 (PARTITION-LEVEL SORT, 1009) - Reducer 15 <- Reducer 14 (GROUP, 1009) - Reducer 17 <- Map 16 (PARTITION-LEVEL SORT, 1), Map 19 (PARTITION-LEVEL SORT, 1), Map 20 (PARTITION-LEVEL SORT, 1), Map 21 (PARTITION-LEVEL SORT, 1) - Reducer 18 <- Reducer 17 (GROUP, 1009) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 9 (PARTITION-LEVEL SORT, 327) - Reducer 3 <- Reducer 12 (PARTITION-LEVEL SORT, 209), Reducer 2 (PARTITION-LEVEL SORT, 209) - Reducer 4 <- Reducer 15 (PARTITION-LEVEL SORT, 1009), Reducer 3 (PARTITION-LEVEL SORT, 1009) - Reducer 5 <- Reducer 4 (GROUP, 1009) - Reducer 6 <- Reducer 5 (GROUP, 1) - Reducer 7 <- Reducer 6 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 327), Map 8 (PARTITION-LEVEL SORT, 327) + Reducer 3 <- Map 10 (PARTITION-LEVEL SORT, 362), Reducer 12 (PARTITION-LEVEL SORT, 362), Reducer 2 (PARTITION-LEVEL SORT, 362) + Reducer 4 <- Reducer 3 (GROUP, 224) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -130,7 +124,7 @@ STAGE PLANS: alias: ws1 Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) (type: boolean) + predicate: (ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_ship_date_sk (type: int), ws_ship_addr_sk (type: int), ws_web_site_sk (type: int), ws_warehouse_sk (type: int), ws_order_number (type: int), ws_ext_ship_cost (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)) @@ -144,7 +138,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: - 1 Map 8 + 1 Map 7 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) @@ -154,6 +148,29 @@ STAGE PLANS: value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) Local Work: Map Reduce Local Work + Map 10 + Map Operator Tree: + TableScan + alias: ws2 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ws_order_number is not null and ws_warehouse_sk is not null) (type: boolean) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ws_order_number (type: int), ws_warehouse_sk (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Map 11 Map Operator Tree: TableScan @@ -172,65 +189,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14398467 Data size: 1325194184 Basic stats: COMPLETE Column stats: NONE - Map 13 - Map Operator Tree: - TableScan - alias: ws2 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Map 16 - Map Operator Tree: - TableScan - alias: date_dim - Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 73049 Data size: 292196 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 73049 Data size: 292196 Basic stats: COMPLETE Column stats: COMPLETE - Map 19 - Map Operator Tree: - TableScan - alias: web_site - Statistics: Num rows: 84 Data size: 155408 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 84 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 84 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE - Map 20 - Map Operator Tree: - TableScan - alias: customer_address - Statistics: Num rows: 40000000 Data size: 40595195284 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 40000000 Data size: 160000000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 40000000 Data size: 160000000 Basic stats: COMPLETE Column stats: COMPLETE - Map 21 - Map Operator Tree: - TableScan - alias: ws1 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ws_warehouse_sk (type: int), ws_order_number (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - Map 9 + Map 8 Map Operator Tree: TableScan alias: customer_address @@ -264,86 +223,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7199233 Data size: 662597045 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) - Reducer 14 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col2 <> _col0) (type: boolean) - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: int) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: int), _col3 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 5072854730221289472 Data size: 5072854730221289472 Basic stats: COMPLETE Column stats: NONE - Reducer 15 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2536427365110644736 Data size: 2536427365110644736 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2536427365110644736 Data size: 2536427365110644736 Basic stats: COMPLETE Column stats: NONE - Reducer 17 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - keys: - 0 - 1 - 2 - 3 - outputColumnNames: _col3, _col4 - Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col4 (type: int), _col3 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 9223372036854775807 Data size: 9223372036854775807 Basic stats: COMPLETE Column stats: NONE - Reducer 18 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4611686018427387903 Data size: 4611686018427387904 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Reducer 2 Local Work: Map Reduce Local Work @@ -364,7 +243,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col3, _col4, _col5, _col6 input vertices: - 1 Map 10 + 1 Map 9 Statistics: Num rows: 191667562 Data size: 26061245514 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: int) @@ -376,59 +255,46 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Left Semi Join 0 to 1 + Left Outer Join 0 to 2 keys: 0 _col4 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5, _col6, _col14 - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int), _col4 (type: int) - sort order: ++ - Map-reduce partition columns: _col3 (type: int), _col4 (type: int) - Statistics: Num rows: 210834322 Data size: 28667370686 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean) - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col4, _col5, _col6, _col14 - Statistics: Num rows: 2790070162094850048 Data size: 2790070162094850048 Basic stats: COMPLETE Column stats: NONE + 2 _col0 (type: int) + outputColumnNames: _col3, _col4, _col5, _col6, _col14, _col16 + residual filter predicates: {(_col3 <> _col14)} + Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col14 (type: boolean) + expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)), _col16 (type: boolean) outputColumnNames: _col4, _col5, _col6, _col16 - Statistics: Num rows: 2790070162094850048 Data size: 2790070162094850048 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 421668645 Data size: 57334741373 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col16 is null (type: boolean) - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col4 (type: int), _col5 (type: decimal(7,2)), _col6 (type: decimal(7,2)) outputColumnNames: _col4, _col5, _col6 - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col5), sum(_col6) keys: _col4 (type: int) mode: hash outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)) - Reducer 5 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: int) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1395035081047425024 Data size: 1395035081047425024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 210834322 Data size: 28667370618 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0), sum(_col1), sum(_col2) mode: partial2 @@ -438,7 +304,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2) @@ -455,7 +321,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)) - Reducer 7 + Reducer 6 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: decimal(17,2)), VALUE._col1 (type: decimal(17,2)) diff --git a/ql/src/test/results/clientpositive/perf/tez/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/query10.q.out index cf3651b35b..457e55e59f 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -117,172 +117,160 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 10 <- Map 11 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 18 <- Reducer 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 - File Output Operator [FS_74] - Limit [LIM_73] (rows=100 width=88) + Reducer 6 + File Output Operator [FS_69] + Limit [LIM_68] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_72] (rows=383325119 width=88) + Select Operator [SEL_67] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_71] - Select Operator [SEL_70] (rows=383325119 width=88) + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_66] + Select Operator [SEL_65] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] - Group By Operator [GBY_69] (rows=383325119 width=88) + Group By Operator [GBY_64] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_68] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Group By Operator [GBY_67] (rows=766650239 width=88) + Group By Operator [GBY_62] (rows=2090864244 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Select Operator [SEL_66] (rows=766650239 width=88) + Select Operator [SEL_61] (rows=2090864244 width=88) Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Filter Operator [FIL_65] (rows=766650239 width=88) - predicate:(_col16 is not null or _col18 is not null) - Merge Join Operator [MERGEJOIN_112] (rows=766650239 width=88) - Conds:RS_62._col0=RS_63._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col16","_col18"] - <-Reducer 18 [ONE_TO_ONE_EDGE] - FORWARD [RS_63] + Filter Operator [FIL_60] (rows=2090864244 width=88) + predicate:(_col15 is not null or _col17 is not null) + Merge Join Operator [MERGEJOIN_108] (rows=2090864244 width=88) + Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_57._col0(Left Outer),RS_55._col0=RS_58._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col0 - Select Operator [SEL_61] (rows=158394413 width=135) + Group By Operator [GBY_54] (rows=633595212 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=633595212 width=88) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_105] (rows=633595212 width=88) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=4058 width=1119) + Output:["_col0"] + Filter Operator [FIL_98] (rows=4058 width=1119) + predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) + TableScan [TS_12] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_97] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 13 [ONE_TO_ONE_EDGE] + FORWARD [RS_57] + PartitionCols:_col0 + Select Operator [SEL_32] (rows=79201469 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_60] (rows=158394413 width=135) + Group By Operator [GBY_31] (rows=79201469 width=135) Output:["_col0"],keys:KEY._col0 - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_59] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_58] (rows=316788826 width=135) + Group By Operator [GBY_29] (rows=158402938 width=135) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_110] (rows=316788826 width=135) - Conds:RS_54._col0=RS_55._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_106] (rows=158402938 width=135) + Conds:RS_25._col0=RS_26._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_14] <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_55] + SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_28] (rows=4058 width=1119) - Output:["_col0"] - Filter Operator [FIL_103] (rows=4058 width=1119) - predicate:((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) - TableScan [TS_26] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 19 [SIMPLE_EDGE] - SHUFFLE [RS_54] + Select Operator [SEL_21] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_99] (rows=144002668 width=135) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_19] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 15 [ONE_TO_ONE_EDGE] + FORWARD [RS_58] + PartitionCols:_col0 + Select Operator [SEL_46] (rows=158394413 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_45] (rows=158394413 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Group By Operator [GBY_43] (rows=316788826 width=135) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_107] (rows=316788826 width=135) + Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_14] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_39] PartitionCols:_col0 - Select Operator [SEL_50] (rows=287989836 width=135) + Select Operator [SEL_35] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_104] (rows=287989836 width=135) + Filter Operator [FIL_101] (rows=287989836 width=135) predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_48] (rows=287989836 width=135) + TableScan [TS_33] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_62] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_55] PartitionCols:_col0 - Select Operator [SEL_47] (rows=696954748 width=88) - Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_111] (rows=696954748 width=88) - Conds:RS_43._col0=RS_44._col0(Left Outer),RS_43._col0=RS_45._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15"] - <-Reducer 12 [ONE_TO_ONE_EDGE] - FORWARD [RS_44] - PartitionCols:_col0 - Select Operator [SEL_22] (rows=79201469 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_21] (rows=79201469 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_20] - PartitionCols:_col0 - Group By Operator [GBY_19] (rows=158402938 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_108] (rows=158402938 width=135) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_28] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_100] (rows=144002668 width=135) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_9] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 15 [ONE_TO_ONE_EDGE] - FORWARD [RS_45] - PartitionCols:_col0 - Group By Operator [GBY_35] (rows=316797606 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Group By Operator [GBY_33] (rows=633595212 width=88) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_109] (rows=633595212 width=88) - Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_28] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Select Operator [SEL_25] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_102] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_107] (rows=96800003 width=860) - Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_99] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_106] (rows=88000001 width=860) - Conds:RS_37._col2=RS_38._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_97] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_98] (rows=20000000 width=1014) - predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"] + Merge Join Operator [MERGEJOIN_104] (rows=96800003 width=860) + Conds:RS_50._col1=RS_51._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1861800 width=385) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_96] (rows=1861800 width=385) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_103] (rows=88000001 width=860) + Conds:RS_47._col2=RS_48._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_47] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_94] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_95] (rows=20000000 width=1014) + predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out index b2a45f155a..dfdd53de00 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[890][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[891][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 12' is a cross product Warning: Shuffle Join MERGEJOIN[892][tables = [$hdt$_2, $hdt$_3, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[891][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 12' is a cross product +Warning: Shuffle Join MERGEJOIN[890][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 5' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk diff --git a/ql/src/test/results/clientpositive/perf/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out index a7b710d6e1..8107a05b54 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query16.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: explain select count(distinct cs_order_number) as `order count` @@ -63,179 +62,125 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 14 <- Map 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE), Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 - File Output Operator [FS_74] - Limit [LIM_72] (rows=1 width=344) + Reducer 8 + File Output Operator [FS_49] + Limit [LIM_47] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_71] (rows=1 width=344) + Select Operator [SEL_46] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_70] - Select Operator [SEL_69] (rows=1 width=344) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_45] + Select Operator [SEL_44] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_112] (rows=1 width=344) + Group By Operator [GBY_77] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=344) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_76] + Group By Operator [GBY_75] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_109] (rows=1395035081047425024 width=1) + Group By Operator [GBY_74] (rows=421645953 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_108] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_107] (rows=1395035081047425024 width=1) + Group By Operator [GBY_72] (rows=421645953 width=135) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_65] (rows=1395035081047425024 width=1) + Select Operator [SEL_40] (rows=421645953 width=135) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_64] (rows=1395035081047425024 width=1) + Filter Operator [FIL_39] (rows=421645953 width=135) predicate:_col16 is null - Select Operator [SEL_63] (rows=2790070162094850048 width=1) + Select Operator [SEL_38] (rows=843291907 width=135) Output:["_col4","_col5","_col6","_col16"] - Merge Join Operator [MERGEJOIN_119] (rows=2790070162094850048 width=1) - Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"] - <-Reducer 17 [ONE_TO_ONE_EDGE] - FORWARD [RS_61] - PartitionCols:_col0, _col1 - Group By Operator [GBY_46] (rows=2536427365110644736 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0, _col1 - Group By Operator [GBY_44] (rows=5072854730221289472 width=1) - Output:["_col0","_col1"],keys:_col2, _col3 - Select Operator [SEL_43] (rows=5072854730221289472 width=1) - Output:["_col2","_col3"] - Filter Operator [FIL_42] (rows=5072854730221289472 width=1) - predicate:(_col2 <> _col0) - Merge Join Operator [MERGEJOIN_117] (rows=5072854730221289472 width=1) - Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_39] - PartitionCols:_col1 - Select Operator [SEL_20] (rows=287989836 width=135) - Output:["_col0","_col1"] - TableScan [TS_19] (rows=287989836 width=135) - default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Select Operator [SEL_38] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=9223372036854775807 width=1) - Output:["_col0","_col1"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_113] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_32] - Please refer to the previous Select Operator [SEL_20] - <-Map 20 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - Select Operator [SEL_22] (rows=73049 width=4) - TableScan [TS_21] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE - <-Map 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_30] - Select Operator [SEL_24] (rows=60 width=4) - TableScan [TS_23] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE - <-Map 22 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Select Operator [SEL_26] (rows=40000000 width=4) - TableScan [TS_25] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col3, _col4 - Merge Join Operator [MERGEJOIN_118] (rows=421645953 width=135) - Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"] - <-Reducer 14 [ONE_TO_ONE_EDGE] - FORWARD [RS_58] + Merge Join Operator [MERGEJOIN_81] (rows=843291907 width=135) + Conds:RS_34._col4=RS_35._col0(Left Semi),RS_34._col4=RS_36._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14","_col16"],residual filter predicates:{(_col3 <> _col14)} + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_33] (rows=287989836 width=135) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_24] (rows=287989836 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_70] (rows=287989836 width=135) + predicate:(cs_order_number is not null and cs_warehouse_sk is not null) + TableScan [TS_22] (rows=287989836 width=135) + default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] + <-Reducer 14 [ONE_TO_ONE_EDGE] + FORWARD [RS_36] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=14399440 width=106) + Output:["_col0","_col1"] + Group By Operator [GBY_30] (rows=14399440 width=106) + Output:["_col0"],keys:KEY._col0 + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=28798881 width=106) + Output:["_col0"],keys:cr_order_number + Filter Operator [FIL_71] (rows=28798881 width=106) + predicate:cr_order_number is not null + TableScan [TS_25] (rows=28798881 width=106) + default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_80] (rows=383314495 width=135) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_18] (rows=14399440 width=106) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=14399440 width=106) - Output:["_col0"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=28798881 width=106) - Output:["_col0"],keys:cr_order_number - Filter Operator [FIL_104] (rows=28798881 width=106) - predicate:cr_order_number is not null - TableScan [TS_12] (rows=28798881 width=106) - default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_116] (rows=383314495 width=135) - Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] + Select Operator [SEL_11] (rows=30 width=2045) + Output:["_col0"] + Filter Operator [FIL_69] (rows=30 width=2045) + predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) + TableScan [TS_9] (rows=60 width=2045) + default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_79] (rows=348467716 width=135) + Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_11] (rows=30 width=2045) + Select Operator [SEL_8] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_103] (rows=30 width=2045) - predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) - TableScan [TS_9] (rows=60 width=2045) - default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_115] (rows=348467716 width=135) - Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_52] + Filter Operator [FIL_68] (rows=20000000 width=1014) + predicate:((ca_state = 'NY') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_78] (rows=316788826 width=135) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) + Select Operator [SEL_2] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_66] (rows=287989836 width=135) + predicate:(cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_102] (rows=20000000 width=1014) - predicate:((ca_state = 'NY') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_114] (rows=316788826 width=135) - Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_48] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_100] (rows=287989836 width=135) - predicate:(cs_call_center_sk is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_101] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Filter Operator [FIL_67] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 2001-04-01 00:00:00.0 AND 2001-05-31 01:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out index 7112de61d9..3507278931 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product Warning: Shuffle Join MERGEJOIN[369][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 30' is a cross product +Warning: Shuffle Join MERGEJOIN[367][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 25' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt diff --git a/ql/src/test/results/clientpositive/perf/tez/query35.q.out b/ql/src/test/results/clientpositive/perf/tez/query35.q.out index a72f57816e..decbadbdcd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query35.q.out @@ -113,172 +113,160 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 10 <- Map 11 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 11 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 7 - File Output Operator [FS_75] - Limit [LIM_73] (rows=100 width=88) + Reducer 6 + File Output Operator [FS_70] + Limit [LIM_68] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_72] (rows=383325119 width=88) + Select Operator [SEL_67] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_71] - Select Operator [SEL_70] (rows=383325119 width=88) + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_66] + Select Operator [SEL_65] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_69] (rows=383325119 width=88) + Group By Operator [GBY_64] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","avg(VALUE._col1)","max(VALUE._col2)","sum(VALUE._col3)","avg(VALUE._col4)","max(VALUE._col5)","sum(VALUE._col6)","avg(VALUE._col7)","max(VALUE._col8)","sum(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_68] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_63] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_67] (rows=766650239 width=88) + Group By Operator [GBY_62] (rows=2090864244 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","avg(_col8)","max(_col8)","sum(_col8)","avg(_col9)","max(_col9)","sum(_col9)","avg(_col10)","max(_col10)","sum(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 - Select Operator [SEL_66] (rows=766650239 width=88) + Select Operator [SEL_61] (rows=2090864244 width=88) Output:["_col4","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_65] (rows=766650239 width=88) - predicate:(_col13 is not null or _col15 is not null) - Merge Join Operator [MERGEJOIN_113] (rows=766650239 width=88) - Conds:RS_62._col0=RS_63._col0(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col13","_col15"] - <-Reducer 17 [ONE_TO_ONE_EDGE] - FORWARD [RS_63] + Filter Operator [FIL_60] (rows=2090864244 width=88) + predicate:(_col12 is not null or _col14 is not null) + Merge Join Operator [MERGEJOIN_109] (rows=2090864244 width=88) + Conds:RS_55._col0=RS_56._col0(Left Semi),RS_55._col0=RS_57._col0(Left Outer),RS_55._col0=RS_58._col0(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col12","_col14"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_56] PartitionCols:_col0 - Select Operator [SEL_61] (rows=158394413 width=135) + Group By Operator [GBY_54] (rows=633595212 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=633595212 width=88) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_106] (rows=633595212 width=88) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=12174 width=1119) + Output:["_col0"] + Filter Operator [FIL_99] (rows=12174 width=1119) + predicate:((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_12] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_98] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 13 [ONE_TO_ONE_EDGE] + FORWARD [RS_57] + PartitionCols:_col0 + Select Operator [SEL_32] (rows=79201469 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_60] (rows=158394413 width=135) + Group By Operator [GBY_31] (rows=79201469 width=135) Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_59] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_58] (rows=316788826 width=135) + Group By Operator [GBY_29] (rows=158402938 width=135) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_111] (rows=316788826 width=135) - Conds:RS_54._col0=RS_55._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_55] + Merge Join Operator [MERGEJOIN_107] (rows=158402938 width=135) + Conds:RS_25._col0=RS_26._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col0 - Select Operator [SEL_14] (rows=12174 width=1119) - Output:["_col0"] - Filter Operator [FIL_102] (rows=12174 width=1119) - predicate:((d_qoy < 4) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_12] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] - <-Map 19 [SIMPLE_EDGE] - SHUFFLE [RS_54] + Please refer to the previous Select Operator [SEL_14] + <-Map 16 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_50] (rows=287989836 width=135) + Select Operator [SEL_21] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_105] (rows=287989836 width=135) - predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_48] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_62] + Filter Operator [FIL_100] (rows=144002668 width=135) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_19] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 15 [ONE_TO_ONE_EDGE] + FORWARD [RS_58] PartitionCols:_col0 - Select Operator [SEL_47] (rows=696954748 width=88) - Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col13"] - Merge Join Operator [MERGEJOIN_112] (rows=696954748 width=88) - Conds:RS_43._col0=RS_44._col0(Left Outer),RS_43._col0=RS_45._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col12"] - <-Reducer 12 [ONE_TO_ONE_EDGE] - FORWARD [RS_44] + Select Operator [SEL_46] (rows=158394413 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_45] (rows=158394413 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0 - Select Operator [SEL_22] (rows=79201469 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_21] (rows=79201469 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_20] + Group By Operator [GBY_43] (rows=316788826 width=135) + Output:["_col0"],keys:_col1 + Merge Join Operator [MERGEJOIN_108] (rows=316788826 width=135) + Conds:RS_39._col0=RS_40._col0(Inner),Output:["_col1"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_40] PartitionCols:_col0 - Group By Operator [GBY_19] (rows=158402938 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_109] (rows=158402938 width=135) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_14] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_101] (rows=144002668 width=135) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_9] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 15 [ONE_TO_ONE_EDGE] - FORWARD [RS_45] - PartitionCols:_col0 - Group By Operator [GBY_35] (rows=316797606 width=88) - Output:["_col0"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Group By Operator [GBY_33] (rows=633595212 width=88) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_110] (rows=633595212 width=88) - Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_14] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Select Operator [SEL_25] (rows=575995635 width=88) - Output:["_col0","_col1"] - Filter Operator [FIL_103] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_108] (rows=96800003 width=860) - Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_100] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_107] (rows=88000001 width=860) - Conds:RS_37._col2=RS_38._col0(Inner),Output:["_col0","_col1","_col4"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_98] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=40000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_99] (rows=40000000 width=1014) - predicate:ca_address_sk is not null - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + Please refer to the previous Select Operator [SEL_14] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_35] (rows=287989836 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_102] (rows=287989836 width=135) + predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_33] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_105] (rows=96800003 width=860) + Conds:RS_50._col1=RS_51._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1861800 width=385) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_97] (rows=1861800 width=385) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_104] (rows=88000001 width=860) + Conds:RS_47._col2=RS_48._col0(Inner),Output:["_col0","_col1","_col4"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_47] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_95] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=40000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_96] (rows=40000000 width=1014) + predicate:ca_address_sk is not null + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query69.q.out b/ql/src/test/results/clientpositive/perf/tez/query69.q.out index 591f3fcdb0..08ea5a1f64 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query69.q.out @@ -93,16 +93,15 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 13 (SIMPLE_EDGE) -Reducer 12 <- Reducer 11 (SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Map 13 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 12 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Map 9 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 12 (ONE_TO_ONE_EDGE), Reducer 15 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 16 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) @@ -111,158 +110,153 @@ Stage-0 limit:100 Stage-1 Reducer 7 - File Output Operator [FS_76] - Limit [LIM_75] (rows=100 width=88) + File Output Operator [FS_73] + Limit [LIM_72] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_74] (rows=95831279 width=88) + Select Operator [SEL_71] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_73] - Select Operator [SEL_72] (rows=95831279 width=88) + SHUFFLE [RS_70] + Select Operator [SEL_69] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Group By Operator [GBY_71] (rows=95831279 width=88) + Group By Operator [GBY_68] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_70] + SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_69] (rows=191662559 width=88) + Group By Operator [GBY_66] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 - Select Operator [SEL_68] (rows=191662559 width=88) + Select Operator [SEL_65] (rows=383325119 width=88) Output:["_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_67] (rows=191662559 width=88) - predicate:_col15 is null - Merge Join Operator [MERGEJOIN_114] (rows=383325119 width=88) - Conds:RS_64._col0=RS_65._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col15"] - <-Reducer 17 [ONE_TO_ONE_EDGE] - FORWARD [RS_65] + Filter Operator [FIL_64] (rows=383325119 width=88) + predicate:_col14 is null + Merge Join Operator [MERGEJOIN_111] (rows=766650239 width=88) + Conds:RS_61._col0=RS_62._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col14"] + <-Reducer 16 [ONE_TO_ONE_EDGE] + FORWARD [RS_62] PartitionCols:_col0 - Select Operator [SEL_63] (rows=158394413 width=135) + Select Operator [SEL_60] (rows=158394413 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_62] (rows=158394413 width=135) + Group By Operator [GBY_59] (rows=158394413 width=135) Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_61] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_58] PartitionCols:_col0 - Group By Operator [GBY_60] (rows=316788826 width=135) + Group By Operator [GBY_57] (rows=316788826 width=135) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_112] (rows=316788826 width=135) - Conds:RS_56._col0=RS_57._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_57] + Merge Join Operator [MERGEJOIN_109] (rows=316788826 width=135) + Conds:RS_53._col0=RS_54._col0(Inner),Output:["_col1"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_54] PartitionCols:_col0 Select Operator [SEL_14] (rows=4058 width=1119) Output:["_col0"] - Filter Operator [FIL_103] (rows=4058 width=1119) + Filter Operator [FIL_100] (rows=4058 width=1119) predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 19 [SIMPLE_EDGE] - SHUFFLE [RS_56] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_53] PartitionCols:_col0 - Select Operator [SEL_52] (rows=287989836 width=135) + Select Operator [SEL_49] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_106] (rows=287989836 width=135) + Filter Operator [FIL_103] (rows=287989836 width=135) predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_50] (rows=287989836 width=135) + TableScan [TS_47] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_64] + SHUFFLE [RS_61] PartitionCols:_col0 - Select Operator [SEL_49] (rows=348477374 width=88) + Select Operator [SEL_46] (rows=696954748 width=88) Output:["_col0","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_48] (rows=348477374 width=88) - predicate:_col13 is null - Select Operator [SEL_47] (rows=696954748 width=88) - Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col13"] - Merge Join Operator [MERGEJOIN_113] (rows=696954748 width=88) - Conds:RS_43._col0=RS_44._col0(Left Outer),RS_43._col0=RS_45._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"] - <-Reducer 12 [ONE_TO_ONE_EDGE] - FORWARD [RS_44] - PartitionCols:_col0 - Select Operator [SEL_22] (rows=79201469 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_21] (rows=79201469 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_20] + Filter Operator [FIL_45] (rows=696954748 width=88) + predicate:_col12 is null + Merge Join Operator [MERGEJOIN_110] (rows=1393909496 width=88) + Conds:RS_41._col0=RS_42._col0(Left Semi),RS_41._col0=RS_43._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Group By Operator [GBY_40] (rows=633595212 width=88) + Output:["_col0"],keys:_col0 + Select Operator [SEL_18] (rows=633595212 width=88) + Output:["_col0"] + Merge Join Operator [MERGEJOIN_107] (rows=633595212 width=88) + Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Group By Operator [GBY_19] (rows=158402938 width=135) - Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_110] (rows=158402938 width=135) - Conds:RS_15._col0=RS_16._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_14] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_102] (rows=144002668 width=135) - predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_9] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] - <-Reducer 15 [ONE_TO_ONE_EDGE] - FORWARD [RS_45] - PartitionCols:_col0 - Group By Operator [GBY_35] (rows=316797606 width=88) + Please refer to the previous Select Operator [SEL_14] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=575995635 width=88) + Output:["_col0","_col1"] + Filter Operator [FIL_99] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_9] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] + <-Reducer 14 [ONE_TO_ONE_EDGE] + FORWARD [RS_43] + PartitionCols:_col0 + Select Operator [SEL_32] (rows=79201469 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_31] (rows=79201469 width=135) Output:["_col0"],keys:KEY._col0 - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_34] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col0 - Group By Operator [GBY_33] (rows=633595212 width=88) + Group By Operator [GBY_29] (rows=158402938 width=135) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_111] (rows=633595212 width=88) - Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_30] + Merge Join Operator [MERGEJOIN_108] (rows=158402938 width=135) + Conds:RS_25._col0=RS_26._col0(Inner),Output:["_col1"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_26] PartitionCols:_col0 Please refer to the previous Select Operator [SEL_14] - <-Map 18 [SIMPLE_EDGE] - SHUFFLE [RS_29] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_25] (rows=575995635 width=88) + Select Operator [SEL_21] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_104] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_23] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_109] (rows=96800003 width=860) - Conds:RS_40._col1=RS_41._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=1861800 width=385) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_101] (rows=1861800 width=385) - predicate:cd_demo_sk is not null - TableScan [TS_6] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_108] (rows=88000001 width=860) - Conds:RS_37._col2=RS_38._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=80000000 width=860) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_99] (rows=80000000 width=860) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null) - TableScan [TS_0] (rows=80000000 width=860) - default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=20000000 width=1014) - Output:["_col0"] - Filter Operator [FIL_100] (rows=20000000 width=1014) - predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) - TableScan [TS_3] (rows=40000000 width=1014) - default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + Filter Operator [FIL_101] (rows=144002668 width=135) + predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_19] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_106] (rows=96800003 width=860) + Conds:RS_36._col1=RS_37._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=1861800 width=385) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_98] (rows=1861800 width=385) + predicate:cd_demo_sk is not null + TableScan [TS_6] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_105] (rows=88000001 width=860) + Conds:RS_33._col2=RS_34._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=80000000 width=860) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_96] (rows=80000000 width=860) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) + TableScan [TS_0] (rows=80000000 width=860) + default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=20000000 width=1014) + Output:["_col0"] + Filter Operator [FIL_97] (rows=20000000 width=1014) + predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) + TableScan [TS_3] (rows=40000000 width=1014) + default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out index 7674aa7f7c..1c9d16b01d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out @@ -1,4 +1,3 @@ -Warning: Shuffle Join MERGEJOIN[113][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 18' is a cross product PREHOOK: query: explain select count(distinct ws_order_number) as `order count` @@ -59,179 +58,125 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 14 <- Map 13 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE), Map 22 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 14 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 9 - File Output Operator [FS_74] - Limit [LIM_72] (rows=1 width=344) + Reducer 8 + File Output Operator [FS_49] + Limit [LIM_47] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_71] (rows=1 width=344) + Select Operator [SEL_46] (rows=1 width=344) Output:["_col0","_col1","_col2"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_70] - Select Operator [SEL_69] (rows=1 width=344) + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_45] + Select Operator [SEL_44] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_112] (rows=1 width=344) + Group By Operator [GBY_77] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=344) + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_76] + Group By Operator [GBY_75] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_109] (rows=1395035081047425024 width=1) + Group By Operator [GBY_74] (rows=210834322 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_108] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_73] PartitionCols:_col0 - Group By Operator [GBY_107] (rows=1395035081047425024 width=1) + Group By Operator [GBY_72] (rows=210834322 width=135) Output:["_col0","_col2","_col3"],aggregations:["sum(_col5)","sum(_col6)"],keys:_col4 - Select Operator [SEL_65] (rows=1395035081047425024 width=1) + Select Operator [SEL_40] (rows=210834322 width=135) Output:["_col4","_col5","_col6"] - Filter Operator [FIL_64] (rows=1395035081047425024 width=1) + Filter Operator [FIL_39] (rows=210834322 width=135) predicate:_col16 is null - Select Operator [SEL_63] (rows=2790070162094850048 width=1) + Select Operator [SEL_38] (rows=421668645 width=135) Output:["_col4","_col5","_col6","_col16"] - Merge Join Operator [MERGEJOIN_119] (rows=2790070162094850048 width=1) - Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"] - <-Reducer 17 [ONE_TO_ONE_EDGE] - FORWARD [RS_61] - PartitionCols:_col0, _col1 - Group By Operator [GBY_46] (rows=2536427365110644736 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0, _col1 - Group By Operator [GBY_44] (rows=5072854730221289472 width=1) - Output:["_col0","_col1"],keys:_col2, _col3 - Select Operator [SEL_43] (rows=5072854730221289472 width=1) - Output:["_col2","_col3"] - Filter Operator [FIL_42] (rows=5072854730221289472 width=1) - predicate:(_col2 <> _col0) - Merge Join Operator [MERGEJOIN_117] (rows=5072854730221289472 width=1) - Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"] - <-Map 15 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_39] - PartitionCols:_col1 - Select Operator [SEL_20] (rows=144002668 width=135) - Output:["_col0","_col1"] - TableScan [TS_19] (rows=144002668 width=135) - default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Select Operator [SEL_38] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=4611686018427387903 width=1) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=9223372036854775807 width=1) - Output:["_col0","_col1"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_113] (rows=9223372036854775807 width=1) - Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"] - <-Map 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_32] - Please refer to the previous Select Operator [SEL_20] - <-Map 20 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_29] - Select Operator [SEL_22] (rows=73049 width=4) - TableScan [TS_21] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE - <-Map 21 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_30] - Select Operator [SEL_24] (rows=84 width=4) - TableScan [TS_23] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE - <-Map 22 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_31] - Select Operator [SEL_26] (rows=40000000 width=4) - TableScan [TS_25] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col3, _col4 - Merge Join Operator [MERGEJOIN_118] (rows=210834322 width=135) - Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"] - <-Reducer 14 [ONE_TO_ONE_EDGE] - FORWARD [RS_58] + Merge Join Operator [MERGEJOIN_81] (rows=421668645 width=135) + Conds:RS_34._col4=RS_35._col0(Left Semi),RS_34._col4=RS_36._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14","_col16"],residual filter predicates:{(_col3 <> _col14)} + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Group By Operator [GBY_33] (rows=144002668 width=135) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_24] (rows=144002668 width=135) + Output:["_col0","_col1"] + Filter Operator [FIL_70] (rows=144002668 width=135) + predicate:(ws_order_number is not null and ws_warehouse_sk is not null) + TableScan [TS_22] (rows=144002668 width=135) + default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] + <-Reducer 14 [ONE_TO_ONE_EDGE] + FORWARD [RS_36] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=7199233 width=92) + Output:["_col0","_col1"] + Group By Operator [GBY_30] (rows=7199233 width=92) + Output:["_col0"],keys:KEY._col0 + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=14398467 width=92) + Output:["_col0"],keys:wr_order_number + Filter Operator [FIL_71] (rows=14398467 width=92) + predicate:wr_order_number is not null + TableScan [TS_25] (rows=14398467 width=92) + default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_80] (rows=191667562 width=135) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_18] (rows=7199233 width=92) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=7199233 width=92) - Output:["_col0"],keys:KEY._col0 - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=14398467 width=92) - Output:["_col0"],keys:wr_order_number - Filter Operator [FIL_104] (rows=14398467 width=92) - predicate:wr_order_number is not null - TableScan [TS_12] (rows=14398467 width=92) - default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_116] (rows=191667562 width=135) - Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_55] + Select Operator [SEL_11] (rows=42 width=1850) + Output:["_col0"] + Filter Operator [FIL_69] (rows=42 width=1850) + predicate:((web_company_name = 'pri') and web_site_sk is not null) + TableScan [TS_9] (rows=84 width=1850) + default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_79] (rows=174243235 width=135) + Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0 - Select Operator [SEL_11] (rows=42 width=1850) + Select Operator [SEL_8] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_103] (rows=42 width=1850) - predicate:((web_company_name = 'pri') and web_site_sk is not null) - TableScan [TS_9] (rows=84 width=1850) - default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_115] (rows=174243235 width=135) - Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_52] + Filter Operator [FIL_68] (rows=20000000 width=1014) + predicate:((ca_state = 'TX') and ca_address_sk is not null) + TableScan [TS_6] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_78] (rows=158402938 width=135) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] PartitionCols:_col0 - Select Operator [SEL_8] (rows=20000000 width=1014) + Select Operator [SEL_2] (rows=144002668 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_66] (rows=144002668 width=135) + predicate:(ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) + TableScan [TS_0] (rows=144002668 width=135) + default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_102] (rows=20000000 width=1014) - predicate:((ca_state = 'TX') and ca_address_sk is not null) - TableScan [TS_6] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_114] (rows=158402938 width=135) - Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_48] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=144002668 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_100] (rows=144002668 width=135) - predicate:(ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) - TableScan [TS_0] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_101] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] + Filter Operator [FIL_67] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] diff --git a/ql/src/test/results/clientpositive/semijoin5.q.out b/ql/src/test/results/clientpositive/semijoin5.q.out index 533c077f58..e42c27f0c8 100644 --- a/ql/src/test/results/clientpositive/semijoin5.q.out +++ b/ql/src/test/results/clientpositive/semijoin5.q.out @@ -48,11 +48,10 @@ WHERE (t2.smallint_col_19) IN (SELECT POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-7 + Stage-2 depends on stages: Stage-1, Stage-6 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 Stage-6 is a root stage - Stage-7 depends on stages: Stage-6 Stage-0 depends on stages: Stage-4 STAGE PLANS: @@ -63,7 +62,7 @@ STAGE PLANS: alias: t1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (bigint_col_7 is not null and decimal2016_col_26 is not null and tinyint_col_3 is not null) (type: boolean) + predicate: (bigint_col_7 is not null and decimal2016_col_26 is not null and timestamp_col_9 is not null and tinyint_col_3 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: tinyint_col_3 (type: tinyint), bigint_col_7 (type: bigint), timestamp_col_9 (type: timestamp), double_col_16 (type: double), decimal2016_col_26 (type: decimal(20,16)), smallint_col_50 (type: smallint) @@ -79,7 +78,7 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (decimal2709_col_9 is not null and tinyint_col_15 is not null and tinyint_col_20 is not null) (type: boolean) + predicate: (decimal2709_col_9 is not null and smallint_col_19 is not null and tinyint_col_15 is not null and tinyint_col_20 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: decimal2709_col_9 (type: decimal(27,9)), int_col_10 (type: int), tinyint_col_15 (type: tinyint), smallint_col_19 (type: smallint), tinyint_col_20 (type: tinyint) @@ -119,17 +118,17 @@ STAGE PLANS: value expressions: _col3 (type: double), _col5 (type: smallint), _col7 (type: int) TableScan Reduce Output Operator - key expressions: _col1 (type: timestamp), -92 (type: int) + key expressions: _col0 (type: timestamp), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col1 (type: timestamp), -92 (type: int) + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col2 (type: timestamp), UDFToInteger(_col9) (type: int) - 1 _col1 (type: timestamp), -92 (type: int) + 1 _col0 (type: timestamp), _col1 (type: int) outputColumnNames: _col3, _col5, _col7 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator @@ -276,43 +275,21 @@ STAGE PLANS: 1 _col0 (type: decimal(26,12)) outputColumnNames: _col2 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col2 (type: timestamp) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: timestamp) - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: timestamp) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: timestamp) - outputColumnNames: _col1 + expressions: _col2 (type: timestamp), -92 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: _col0 (type: timestamp), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out index b89f9f5905..10a880f999 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_partitioner.q.out @@ -104,7 +104,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (l_linenumber = 1) (type: boolean) + predicate: ((l_linenumber = 1) and l_orderkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) @@ -122,10 +122,10 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) + predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), l_linenumber (type: int) + expressions: l_orderkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out index 1c6e38002d..d116f6acd8 100644 --- a/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/spark/constprog_semijoin.q.out @@ -172,14 +172,32 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col2 (type: int) Map 4 Map Operator Tree: TableScan + alias: table2 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), val2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan alias: table3 Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -199,46 +217,29 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: table2 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: id is not null (type: boolean) - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: id (type: int), val2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col0, _col2, _col4 Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col2 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 5 Data size: 110 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col4 (type: string) Reducer 3 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col4 Statistics: Num rows: 5 Data size: 121 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index 76c74d9ab7..8ae90805e0 100644 --- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -1693,11 +1693,11 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_19] - Select Operator [SEL_18] (rows=434 width=178) + Select Operator [SEL_18] (rows=366 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_17] (rows=434 width=178) + Filter Operator [FIL_17] (rows=366 width=179) predicate:_col3 is null - Join Operator [JOIN_16] (rows=500 width=178) + Join Operator [JOIN_16] (rows=500 width=179) Output:["_col0","_col1","_col3"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_14] @@ -1709,21 +1709,21 @@ Stage-0 <-Reducer 4 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_15] PartitionCols:_col0 - Select Operator [SEL_13] (rows=41 width=95) + Select Operator [SEL_13] (rows=83 width=95) Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=41 width=91) + Group By Operator [GBY_12] (rows=83 width=91) Output:["_col0"],keys:_col1 - Select Operator [SEL_8] (rows=41 width=178) + Select Operator [SEL_8] (rows=83 width=178) Output:["_col1"] - Group By Operator [GBY_7] (rows=41 width=178) + Group By Operator [GBY_7] (rows=83 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 3 [GROUP PARTITION-LEVEL SORT] GROUP PARTITION-LEVEL SORT [RS_6] PartitionCols:_col0 - Group By Operator [GBY_5] (rows=41 width=178) + Group By Operator [GBY_5] (rows=83 width=178) Output:["_col0","_col1"],keys:value, key - Filter Operator [FIL_20] (rows=83 width=178) - predicate:((value = value) and (value > 'val_2')) + Filter Operator [FIL_20] (rows=166 width=178) + predicate:(value > 'val_2') TableScan [TS_2] (rows=500 width=178) default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -1758,11 +1758,11 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_18] - Select Operator [SEL_17] (rows=234 width=178) + Select Operator [SEL_17] (rows=183 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=234 width=178) + Filter Operator [FIL_16] (rows=183 width=179) predicate:_col4 is null - Join Operator [JOIN_15] (rows=250 width=178) + Join Operator [JOIN_15] (rows=250 width=179) Output:["_col0","_col1","_col4"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"} <-Reducer 2 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_13] @@ -1781,17 +1781,17 @@ Stage-0 <-Reducer 5 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_14] PartitionCols:_col0, _col1 - Select Operator [SEL_12] (rows=20 width=182) + Select Operator [SEL_12] (rows=83 width=182) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_11] (rows=20 width=178) + Group By Operator [GBY_11] (rows=83 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 4 [GROUP] GROUP [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=20 width=178) + Group By Operator [GBY_9] (rows=83 width=178) Output:["_col0","_col1"],keys:key, value - Filter Operator [FIL_19] (rows=41 width=178) - predicate:((key = key) and (value = value) and (value > 'val_12')) + Filter Operator [FIL_19] (rows=166 width=178) + predicate:((value > 'val_12') and key is not null) TableScan [TS_6] (rows=500 width=178) default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -1834,7 +1834,7 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_11] - Join Operator [JOIN_9] (rows=32 width=178) + Join Operator [JOIN_9] (rows=134 width=178) Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"} <-Map 1 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_7] @@ -1846,12 +1846,12 @@ Stage-0 <-Map 3 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_8] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=20 width=178) + Group By Operator [GBY_6] (rows=83 width=178) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_4] (rows=41 width=178) + Select Operator [SEL_4] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_12] (rows=41 width=178) - predicate:((key = key) and (value = value) and (value > 'val_9')) + Filter Operator [FIL_12] (rows=166 width=178) + predicate:((value > 'val_9') and key is not null) TableScan [TS_2] (rows=500 width=178) default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -1884,7 +1884,7 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_11] - Join Operator [JOIN_9] (rows=32 width=178) + Join Operator [JOIN_9] (rows=134 width=178) Output:["_col0","_col1"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"} <-Map 1 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_7] @@ -1896,12 +1896,12 @@ Stage-0 <-Map 3 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_8] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=20 width=178) + Group By Operator [GBY_6] (rows=83 width=178) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_4] (rows=41 width=178) + Select Operator [SEL_4] (rows=166 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_12] (rows=41 width=178) - predicate:((key = key) and (value = value) and (value > 'val_9')) + Filter Operator [FIL_12] (rows=166 width=178) + predicate:((value > 'val_9') and key is not null) TableScan [TS_2] (rows=500 width=178) default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] @@ -1977,12 +1977,12 @@ Stage-0 <-Map 6 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_19] PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=2 width=8) + Group By Operator [GBY_17] (rows=4 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_12] (rows=7 width=8) + Select Operator [SEL_12] (rows=14 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_25] (rows=7 width=96) - predicate:((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) + Filter Operator [FIL_25] (rows=14 width=96) + predicate:((l_shipmode = 'AIR') and l_linenumber is not null) TableScan [TS_10] (rows=100 width=96) default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"] <-Reducer 3 [PARTITION-LEVEL SORT] @@ -2272,14 +2272,14 @@ Stage-0 File Output Operator [FS_26] Select Operator [SEL_25] (rows=13 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_24] (rows=13 width=227) + Filter Operator [FIL_24] (rows=13 width=231) predicate:(not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) - Join Operator [JOIN_23] (rows=26 width=227) + Join Operator [JOIN_23] (rows=26 width=230) Output:["_col0","_col1","_col2","_col4","_col5","_col8"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0, _col1","1":"_col0, _col1"} <-Reducer 2 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_21] PartitionCols:_col0, _col1 - Join Operator [JOIN_20] (rows=26 width=226) + Join Operator [JOIN_20] (rows=26 width=229) Output:["_col0","_col1","_col2","_col4","_col5"],condition map:[{"":"{\"type\":\"Left Outer\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"} <-Map 1 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_18] @@ -2291,37 +2291,37 @@ Stage-0 <-Reducer 5 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_19] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=1 width=114) + Group By Operator [GBY_7] (rows=2 width=114) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 4 [GROUP] GROUP [RS_6] PartitionCols:_col0 - Group By Operator [GBY_5] (rows=1 width=114) + Group By Operator [GBY_5] (rows=2 width=114) Output:["_col0","_col1","_col2"],aggregations:["count()","count(p_name)"],keys:p_mfgr - Select Operator [SEL_4] (rows=4 width=223) + Select Operator [SEL_4] (rows=8 width=223) Output:["p_name","p_mfgr"] - Filter Operator [FIL_27] (rows=4 width=223) - predicate:((p_mfgr = p_mfgr) and (p_size < 10)) + Filter Operator [FIL_27] (rows=8 width=223) + predicate:((p_size < 10) and p_mfgr is not null) TableScan [TS_2] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] <-Reducer 7 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_22] PartitionCols:_col0, _col1 - Select Operator [SEL_17] (rows=2 width=223) + Select Operator [SEL_17] (rows=4 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_16] (rows=2 width=219) + Filter Operator [FIL_16] (rows=4 width=219) predicate:_col0 is not null - Group By Operator [GBY_14] (rows=2 width=219) + Group By Operator [GBY_14] (rows=4 width=219) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 6 [GROUP] GROUP [RS_13] PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=2 width=219) + Group By Operator [GBY_12] (rows=4 width=219) Output:["_col0","_col1"],keys:p_name, p_mfgr - Select Operator [SEL_11] (rows=4 width=223) + Select Operator [SEL_11] (rows=8 width=223) Output:["p_name","p_mfgr"] - Filter Operator [FIL_28] (rows=4 width=223) - predicate:((p_mfgr = p_mfgr) and (p_size < 10)) + Filter Operator [FIL_28] (rows=8 width=223) + predicate:((p_size < 10) and p_mfgr is not null) TableScan [TS_9] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] @@ -2457,7 +2457,7 @@ Stage-0 Select Operator [SEL_24] (rows=1 width=110) Output:["_col0","_col1"] Filter Operator [FIL_23] (rows=1 width=114) - predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1)) + predicate:(((_col2 - _col1) > 600.0) and _col1 is not null) Group By Operator [GBY_22] (rows=5 width=114) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 <-Map 6 [GROUP] @@ -2501,7 +2501,7 @@ Stage-0 Select Operator [SEL_12] (rows=1 width=114) Output:["_col0","_col1"] Filter Operator [FIL_11] (rows=1 width=114) - predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1)) + predicate:(((_col2 - _col1) > 600.0) and _col1 is not null) Group By Operator [GBY_10] (rows=5 width=114) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0 <- Please refer to the previous Map 6 [GROUP] diff --git a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out index dafe5b6d5b..fb13fb73e9 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_exists.q.out @@ -32,37 +32,40 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -72,10 +75,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -230,16 +233,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan @@ -1048,13 +1054,13 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tx1 PREHOOK: query: insert into tx1 values (1, 1), - (1, 2), - (1, 3) + (1, 2), + (1, 3) PREHOOK: type: QUERY PREHOOK: Output: default@tx1 POSTHOOK: query: insert into tx1 values (1, 1), - (1, 2), - (1, 3) + (1, 2), + (1, 3) POSTHOOK: type: QUERY POSTHOOK: Output: default@tx1 POSTHOOK: Lineage: tx1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] @@ -1084,10 +1090,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1095,67 +1099,64 @@ STAGE PLANS: TableScan alias: u Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Map 4 Map Operator Tree: TableScan alias: v Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Map 6 - Map Operator Tree: - TableScan - alias: u - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int) - outputColumnNames: a, b + Filter Operator + predicate: (a is not null and b is not null) (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: a (type: int), b (type: int) - mode: hash + Select Operator + expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: int) - 1 _col0 (type: int), _col1 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -1174,46 +1175,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 <> _col1) (type: boolean) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) Stage: Stage-0 Fetch Operator @@ -1261,7 +1222,6 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.i EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: t2.j EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) @@ -1274,9 +1234,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 6 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1284,102 +1242,64 @@ STAGE PLANS: TableScan alias: t1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: i is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Map 3 Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (i is not null and j is not null) (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - Map 5 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: j (type: int) - outputColumnNames: j - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: j (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 <> _col2) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1387,7 +1307,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 471c2ccd94..fd25e36fba 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -140,37 +140,40 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -180,10 +183,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -424,38 +427,41 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_mfgr is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -474,7 +480,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -495,25 +501,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 5 Reduce Operator Tree: @@ -522,16 +528,20 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -563,21 +573,21 @@ Manufacturer#2 almond aquamarine midnight light salmon 2 Manufacturer#3 almond antique misty red olive 1 Manufacturer#4 almond aquamarine yellow dodger mint 7 Manufacturer#5 almond antique sky peru orange 2 -PREHOOK: query: explain -select * -from src b +PREHOOK: query: explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) PREHOOK: type: QUERY -POSTHOOK: query: explain -select * -from src b +POSTHOOK: query: explain +select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY @@ -589,7 +599,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -597,37 +608,36 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -637,14 +647,31 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -652,21 +679,21 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * -from src b +PREHOOK: query: select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select * -from src b +POSTHOOK: query: select * +from src b where b.key in - (select distinct a.key - from src a + (select distinct a.key + from src a where b.value = a.value and a.key > '9' ) POSTHOOK: type: QUERY @@ -683,37 +710,23 @@ POSTHOOK: Input: default@src 97 val_97 98 val_98 98 val_98 -PREHOOK: query: select p_mfgr, p_name, p_size -from part -where part.p_size in - (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) -PREHOOK: type: QUERY -PREHOOK: Input: default@part -#### A masked pattern was here #### -POSTHOOK: query: select p_mfgr, p_name, p_size -from part -where part.p_size in - (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@part -#### A masked pattern was here #### -Manufacturer#1 almond antique burnished rose metallic 2 -Manufacturer#1 almond antique burnished rose metallic 2 -Manufacturer#2 almond aquamarine midnight light salmon 2 -Manufacturer#3 almond antique misty red olive 1 -Manufacturer#4 almond aquamarine yellow dodger mint 7 -Manufacturer#5 almond antique sky peru orange 2 PREHOOK: query: explain -select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) PREHOOK: type: QUERY POSTHOOK: query: explain -select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -723,76 +736,238 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: int) - Map 6 + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Map 3 Map Operator Tree: TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int) + predicate: (key > '9') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value <> a.key and a.key > '9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: select p_mfgr, p_name, p_size +from part +where part.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from part +where part.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 +Manufacturer#3 almond antique misty red olive 1 +Manufacturer#4 almond aquamarine yellow dodger mint 7 +Manufacturer#5 almond antique sky peru orange 2 +PREHOOK: query: explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +PREHOOK: type: QUERY +POSTHOOK: query: explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: l_partkey is not null (type: boolean) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: li + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -906,7 +1081,7 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: l_partkey is not null (type: boolean) + predicate: (l_partkey is not null and l_quantity is not null) (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_partkey (type: int), l_quantity (type: double), l_extendedprice (type: double) @@ -975,7 +1150,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: double), _col3 (type: int) 1 _col0 (type: double), _col1 (type: int) @@ -1012,15 +1187,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: double), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: int) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1061,7 +1244,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_brand <> 'Brand#14') and (p_size <> 340)) (type: boolean) + predicate: ((p_brand <> 'Brand#14') and (p_size <> 340) and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) @@ -1079,7 +1262,7 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: ((p_size <> 340) and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_type (type: string), p_size (type: int) @@ -1095,10 +1278,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col5 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1115,14 +1298,15 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) + key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1436,9 +1620,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1446,41 +1629,48 @@ STAGE PLANS: TableScan alias: e Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), (_col1 + 100) (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), (_col1 + 100) (type: int) + Select Operator + expressions: p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), (_col1 + 100) (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), (_col1 + 100) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_name is not null and p_partkey is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_name (type: string), p_partkey (type: int) - mode: hash + Select Operator + expressions: p_name (type: string), p_partkey (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), (_col1 + 100) (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1505,22 +1695,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1713,39 +1887,44 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col5 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: int), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col0 (type: int) + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col2 (type: int), _col0 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: int), _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int) - outputColumnNames: p_size + Filter Operator + predicate: p_size is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_size (type: int) @@ -1839,8 +2018,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1848,42 +2026,49 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_partkey is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_partkey = p_partkey) and (p_size = p_size)) (type: boolean) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_partkey (type: int), p_name (type: string), p_size (type: int) - mode: hash + predicate: (p_name is not null and p_partkey is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: string), _col5 (type: int) - 1 _col1 (type: int), _col0 (type: string), _col2 (type: int) + 1 _col0 (type: int), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1893,22 +2078,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1962,8 +2131,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1971,39 +2139,46 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_type (type: string), p_brand (type: string) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Select Operator + expressions: p_name (type: string), p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_brand is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_type (type: string), p_brand (type: string) - mode: hash + Select Operator + expressions: p_type (type: string), p_brand (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col2 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -2016,18 +2191,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2091,31 +2254,37 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_name (type: string), p_type (type: string), (p_size + 1) (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 3 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col1 + 1) (type: int) - sort order: + - Map-reduce partition columns: (_col1 + 1) (type: int) + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: (_col1 + 1) (type: int) + sort order: + + Map-reduce partition columns: (_col1 + 1) (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 5 Map Operator Tree: TableScan @@ -2125,16 +2294,19 @@ STAGE PLANS: expressions: (p_size + 1) (type: int) outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2452,11 +2624,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2464,34 +2635,41 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: sc Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string), value (type: string) - mode: hash + Select Operator + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: s1 @@ -2517,10 +2695,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -2569,23 +2747,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3086,38 +3248,41 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_mfgr is not null and p_name is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Select Operator + expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Map 4 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_mfgr = p_mfgr) and (p_size < 10)) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + predicate: ((p_size < 10) and p_mfgr is not null and p_name is not null) (type: boolean) + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_mfgr (type: string), p_name (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -3196,23 +3361,26 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_type (type: string) @@ -3347,23 +3515,26 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_size is not null and p_type is not null) (type: boolean) + predicate: (p_name is not null and p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_type (type: string) @@ -3500,23 +3671,26 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_type (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + Filter Operator + predicate: (p_name is not null and p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int) - sort order: +++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: string), _col3 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int) + sort order: +++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: string), _col3 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_type (type: string) @@ -3617,23 +3791,26 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_size (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col2 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_type (type: string) @@ -3770,16 +3947,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string) - sort order: + - Map-reduce partition columns: _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col4 (type: string) + sort order: + + Map-reduce partition columns: _col4 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 4 Map Operator Tree: TableScan @@ -3845,10 +4025,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), UDFToLong(_col5) (type: bigint) - 1 _col1 (type: string), _col0 (type: bigint) + 1 _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3885,15 +4065,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: bigint) + Group By Operator + keys: _col0 (type: string), _col1 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3933,16 +4117,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_partkey is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), UDFToDouble(_col5) (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), UDFToDouble(_col5) (type: double) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), UDFToDouble(_col5) (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), UDFToDouble(_col5) (type: double) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan @@ -3967,10 +4154,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int), UDFToDouble(_col5) (type: double) - 1 _col1 (type: int), _col0 (type: double) + 1 _col0 (type: int), _col1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -3988,15 +4175,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: double) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: double) + Group By Operator + keys: _col0 (type: int), _col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4038,7 +4229,7 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) @@ -4062,16 +4253,19 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_partkey is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col5 (type: int) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col0 (type: int), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan @@ -4090,9 +4284,8 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: p_partkey + Filter Operator + predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_partkey (type: int) @@ -4108,10 +4301,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int), _col5 (type: int) - 1 _col1 (type: int), _col0 (type: int) + 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 61 Data size: 14971 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -4158,15 +4351,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int), _col0 (type: int) + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 56 Data size: 13610 Basic stats: COMPLETE Column stats: NONE Reducer 7 Reduce Operator Tree: Group By Operator @@ -4185,7 +4382,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_size in (select min(pp.p_size) from part pp where pp.p_partkey > part.p_partkey) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4902,16 +5099,19 @@ STAGE PLANS: TableScan alias: t Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (i is not null and j is not null) (type: boolean) Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan @@ -4936,7 +5136,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 UDFToLong(_col0) (type: bigint), _col1 (type: int) 1 _col0 (type: bigint), _col1 (type: int) @@ -4957,15 +5157,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) + Select Operator + expressions: _col1 (type: bigint), _col0 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4999,3 +5207,368 @@ POSTHOOK: query: drop table tt POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@tt POSTHOOK: Output: default@tt +Warning: Shuffle Join JOIN[14][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_name is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: string), _col5 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 185 Data size: 45180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 185 Data size: 45180 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 <> _col2) (type: boolean) + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col2 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1) + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 169 Data size: 41073 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 169 Data size: 41073 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 169 Data size: 41073 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 169 Data size: 41073 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[14][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +Warning: Shuffle Join JOIN[18][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col5 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: p + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_name (type: string), p_type (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_type is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_type (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 8 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_size is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_size (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: string), _col5 (type: int) + 1 _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 408 Data size: 99397 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 408 Data size: 99397 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col2 <> _col0) (type: boolean) + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 338 Data size: 82147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 371 Data size: 90361 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 371 Data size: 90361 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 371 Data size: 90361 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 371 Data size: 90361 Basic stats: COMPLETE Column stats: NONE + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[18][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +PREHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref +15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra +17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the +17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out index ff519fda09..b91c33ee4a 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -1607,14 +1607,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 14 (PARTITION-LEVEL SORT, 2) - Reducer 13 <- Reducer 12 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 9 <- Reducer 8 (GROUP, 2) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 13 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Reducer 11 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1622,45 +1621,26 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col4 (type: string), _col1 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col4 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col4 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 10 Map Operator Tree: TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string), p_brand (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 11 - Map Operator Tree: - TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) @@ -1672,13 +1652,13 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) - Map 14 + Map 13 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_brand (type: string) @@ -1700,25 +1680,29 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_type (type: string), p_name (type: string) - mode: hash + Select Operator + expressions: p_type (type: string), p_name (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) @@ -1730,7 +1714,29 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) - Reducer 12 + Map 9 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_brand is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -1750,7 +1756,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 12 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -1774,17 +1780,17 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Reduce Operator Tree: @@ -1794,14 +1800,14 @@ STAGE PLANS: keys: 0 _col4 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col4 (type: string) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Reduce Operator Tree: Join Operator @@ -1810,39 +1816,23 @@ STAGE PLANS: keys: 0 _col3 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 16 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col14 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + predicate: (not CASE WHEN ((_col10 = 0)) THEN (false) WHEN (_col10 is null) THEN (false) WHEN (_col14 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col11 < _col10)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -1864,7 +1854,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 9 + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -1948,7 +1938,7 @@ STAGE PLANS: alias: part_null Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_name is not null (type: boolean) + predicate: (p_brand is not null and p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) @@ -1984,7 +1974,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_brand (type: string), p_type (type: string) @@ -2100,14 +2090,13 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 14 (PARTITION-LEVEL SORT, 2) - Reducer 13 <- Reducer 12 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 13 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) - Reducer 9 <- Reducer 8 (GROUP, 2) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 13 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Reducer 11 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 12 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2115,45 +2104,26 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_container is not null and p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: string), _col1 (type: string), _col6 (type: string) - sort order: +++ - Map-reduce partition columns: _col4 (type: string), _col1 (type: string), _col6 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col4 (type: string), _col1 (type: string), _col6 (type: string) + sort order: +++ + Map-reduce partition columns: _col4 (type: string), _col1 (type: string), _col6 (type: string) + Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col7 (type: double), _col8 (type: string) Map 10 Map Operator Tree: TableScan - alias: pp - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_type is not null (type: boolean) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_type (type: string), p_brand (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 11 - Map Operator Tree: - TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) @@ -2165,13 +2135,13 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) - Map 14 + Map 13 Map Operator Tree: TableScan alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_type (type: string), p_brand (type: string) @@ -2193,25 +2163,29 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_container = p_container) and (p_type = p_type)) (type: boolean) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_type (type: string), p_name (type: string), p_container (type: string) - mode: hash + predicate: (p_container is not null and p_name is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_name (type: string), p_container (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Map 7 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_brand (type: string), p_type (type: string), p_container (type: string) @@ -2223,7 +2197,29 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string), _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) - Reducer 12 + Map 9 + Map Operator Tree: + TableScan + alias: pp + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (p_brand is not null and p_type is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_type (type: string), p_brand (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reducer 11 Reduce Operator Tree: Join Operator condition map: @@ -2234,7 +2230,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col1 (type: string), _col2 (type: string) + keys: _col2 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE @@ -2243,45 +2239,41 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 13 + Reducer 12 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) + value expressions: _col2 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col1 (type: string), _col6 (type: string) - 1 _col1 (type: string), _col0 (type: string), _col2 (type: string) + 1 _col0 (type: string), _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 3 Data size: 399 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Reduce Operator Tree: @@ -2291,55 +2283,39 @@ STAGE PLANS: keys: 0 _col4 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col4 (type: string), _col3 (type: string) + key expressions: _col3 (type: string), _col4 (type: string) sort order: ++ - Map-reduce partition columns: _col4 (type: string), _col3 (type: string) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col13 (type: bigint), _col14 (type: bigint) + Map-reduce partition columns: _col3 (type: string), _col4 (type: string) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Reduce Operator Tree: Join Operator condition map: Left Outer Join 0 to 1 keys: - 0 _col4 (type: string), _col3 (type: string) - 1 _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col17 - Statistics: Num rows: 16 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + 0 _col3 (type: string), _col4 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col14 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col13 = 0)) THEN (false) WHEN (_col13 is null) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col14 < _col13)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + predicate: (not CASE WHEN ((_col10 = 0)) THEN (false) WHEN (_col10 is null) THEN (false) WHEN (_col14 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col11 < _col10)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string), _col2 (type: string) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -2361,7 +2337,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 9 + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -2392,32 +2368,32 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null #### A masked pattern was here #### -40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s +86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously 191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22 ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle -78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith -105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ -112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car -192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir +132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even +90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY PLATED COPPER 1 LG PKG 1922.98 c foxes can s 17273 almond antique forest lavender goldenrod Manufacturer#3 Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along the -110592 almond antique salmon chartreuse burlywood Manufacturer#1 Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the furiously -48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i -33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful -42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl 17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41 ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve +33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41 STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful +48427 almond antique violet mint lemon Manufacturer#4 Brand#42 PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i +45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful +78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52 LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith +192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 15103 almond aquamarine dodger light gainsboro Manufacturer#5 Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder carefu 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12 LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr -146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref -90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl 121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h 121152 almond antique burnished rose metallic Manufacturer#1 Brand#14 PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h -49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick -155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra -45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42 SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful -132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24 SMALL POLISHED NICKEL 25 MED BOX 1698.66 even -86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully +105685 almond antique violet chocolate turquoise Manufacturer#2 Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ +146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23 MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref 195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25 STANDARD PLATED TIN 18 SM PKG 1701.6 ic de +112398 almond antique metallic orange dim Manufacturer#3 Brand#32 MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car 144293 almond antique olive coral navajo Manufacturer#3 Brand#34 STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about +49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41 SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick +42669 almond antique medium spring khaki Manufacturer#5 Brand#51 STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl +155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_type from part where part.p_size = part_null.p_size) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name IN (select p_name from part where part.p_type = part_null.p_type) AND p_brand NOT IN (select p_type from part where part.p_size = part_null.p_size) @@ -2430,12 +2406,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) - Reducer 8 <- Map 7 (GROUP, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) + Reducer 9 <- Map 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2443,35 +2418,42 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 5 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_name (type: string), p_type (type: string) - mode: hash + Select Operator + expressions: p_name (type: string), p_type (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Map 7 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: part @@ -2491,7 +2473,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Map 9 + Map 8 Map Operator Tree: TableScan alias: part @@ -2509,41 +2491,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 10 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: int), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: int) sort order: + Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Reduce Operator Tree: @@ -2553,14 +2515,14 @@ STAGE PLANS: keys: 0 _col5 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string), _col5 (type: int) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col5 (type: int) - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) + Statistics: Num rows: 30 Data size: 3807 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Reduce Operator Tree: Join Operator @@ -2569,35 +2531,23 @@ STAGE PLANS: keys: 0 _col3 (type: string), _col5 (type: int) 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 16 Data size: 2093 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col14 + Statistics: Num rows: 33 Data size: 4187 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + predicate: (not CASE WHEN ((_col10 = 0)) THEN (false) WHEN (_col10 is null) THEN (false) WHEN (_col14 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col11 < _col10)) THEN (true) ELSE (false) END) (type: boolean) + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1046 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2156 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reducer 8 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -2611,6 +2561,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), true (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: boolean) Stage: Stage-0 Fetch Operator @@ -2667,8 +2637,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2676,23 +2645,26 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) @@ -2710,18 +2682,22 @@ STAGE PLANS: alias: pp Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_brand is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_type (type: string), p_brand (type: string) - mode: hash + Select Operator + expressions: p_type (type: string), p_brand (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2743,10 +2719,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col2 (type: string), _col1 (type: string) - 1 _col1 (type: string), _col0 (type: string) + 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -2763,22 +2739,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2892,7 +2852,7 @@ STAGE PLANS: alias: li Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) + predicate: ((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) @@ -2910,16 +2870,16 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) + predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), l_linenumber (type: int), l_quantity (type: double) - outputColumnNames: _col0, _col1, _col2 + expressions: l_orderkey (type: int), l_quantity (type: double) + outputColumnNames: _col0, _col2 Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double) + value expressions: _col0 (type: int), _col2 (type: double) Reducer 10 Reduce Operator Tree: Group By Operator @@ -3010,14 +2970,14 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col4, _col5 + outputColumnNames: _col0, _col2, _col4, _col5 Statistics: Num rows: 25 Data size: 5224 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: double) sort order: + Map-reduce partition columns: _col2 (type: double) Statistics: Num rows: 25 Data size: 5224 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col4 (type: bigint), _col5 (type: bigint) + value expressions: _col0 (type: int), _col4 (type: bigint), _col5 (type: bigint) Reducer 8 Reduce Operator Tree: Join Operator @@ -3026,25 +2986,25 @@ STAGE PLANS: keys: 0 _col2 (type: double) 1 _col0 (type: double) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col4, _col5, _col7 Statistics: Num rows: 27 Data size: 5746 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 14 Data size: 2979 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col4 = 0) or (_col7 is null and _col2 is not null and (_col5 >= _col4))) (type: boolean) + Statistics: Num rows: 17 Data size: 3617 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int) + expressions: _col0 (type: int), 1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 2979 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3617 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 2979 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3617 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 14 Data size: 2979 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3617 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -3103,22 +3063,25 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -3140,7 +3103,7 @@ STAGE PLANS: alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '9') (type: boolean) + predicate: ((key > '9') and value is not null) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -3603,7 +3566,7 @@ POSTHOOK: Input: default@src 409 val_409 3 167 val_167 3 119 val_119 3 -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[15][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain select * from part where p_name IN (select p_name from part p where part.p_type <> '1') PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_name IN (select p_name from part p where part.p_type <> '1') @@ -3626,29 +3589,35 @@ STAGE PLANS: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: ((p_type <> '1') and p_name is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 + Filter Operator + predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: p_name (type: string) + outputColumnNames: _col0 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Map 5 Map Operator Tree: TableScan @@ -3722,7 +3691,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[15][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: select * from part where p_name IN (select p_name from part p where part.p_type <> '1') PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out index 1b2c0880ae..0d12d0db60 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -341,13 +341,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Reducer 2 @@ -395,7 +395,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -416,25 +416,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col0) keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Reduce Operator Tree: @@ -443,19 +443,19 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 8 Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -476,43 +476,43 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reducer 9 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -887,13 +887,13 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 10 Reduce Operator Tree: @@ -902,25 +902,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 12 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -941,25 +941,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 13 Reduce Operator Tree: @@ -968,23 +968,23 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reducer 2 Reduce Operator Tree: @@ -1054,7 +1054,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -1075,49 +1075,49 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reducer 7 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 9 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -1138,25 +1138,25 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Stage: Stage-0 @@ -2901,19 +2901,19 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_partkey = p_partkey) and (p_size = p_size)) (type: boolean) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(p_name) keys: p_partkey (type: int), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: bigint) Map 6 Map Operator Tree: @@ -2921,18 +2921,18 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_partkey = p_partkey) and (p_size = p_size)) (type: boolean) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: p_partkey (type: int), p_name (type: string), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2980,12 +2980,12 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 7 Reduce Operator Tree: @@ -2993,23 +2993,23 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col1 (type: int), _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index de005ada82..8f3ac0d636 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -1984,7 +1984,7 @@ POSTHOOK: Input: default@part_null 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 PREHOOK: type: QUERY POSTHOOK: query: explain select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 @@ -1998,9 +1998,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 5 <- Map 4 (GROUP, 1) - Reducer 7 <- Map 6 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2009,7 +2008,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_size <> 340) (type: boolean) + predicate: ((p_size <> 340) and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) @@ -2043,18 +2042,22 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: ((p_size <> 340) and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: p_type (type: string), p_size (type: int) - mode: hash + Select Operator + expressions: p_type (type: string), p_size (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Group By Operator + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2078,10 +2081,10 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col4 (type: string), _col5 (type: int) - 1 _col1 (type: string), _col0 (type: int) + 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 28 Data size: 8752 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2102,22 +2105,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 7 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col0 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col0 (type: int) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -2125,7 +2112,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[18][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from part where p_brand <> (select min(p_brand) from part ) AND p_size IN (select (p_size) from part p where p.p_type = part.p_type ) AND p_size <> 340 PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -2194,19 +2181,19 @@ STAGE PLANS: alias: p Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((p_partkey = p_partkey) and (p_size = p_size)) (type: boolean) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + predicate: (p_partkey is not null and p_size is not null) (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(p_name) keys: p_partkey (type: int), p_size (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -2239,16 +2226,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: bigint), true (type: boolean), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 3 Data size: 363 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: boolean) Stage: Stage-0 @@ -3611,23 +3598,23 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: l_orderkey, l_linenumber - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(l_orderkey) keys: l_linenumber (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: @@ -3688,16 +3675,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: boolean) Stage: Stage-0 @@ -3811,23 +3798,23 @@ STAGE PLANS: alias: lineitem Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_linenumber (type: int) outputColumnNames: l_orderkey, l_linenumber - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(l_orderkey) keys: l_linenumber (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: @@ -3888,16 +3875,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 12 Data size: 1439 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: boolean) Stage: Stage-0 @@ -4117,23 +4104,26 @@ STAGE PLANS: TableScan alias: part_null Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Filter Operator + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string), _col4 (type: string) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string), _col4 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col4 (type: string) + Statistics: Num rows: 1 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Map 3 Map Operator Tree: TableScan alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: p_type is not null (type: boolean) + predicate: (p_name is not null and p_type is not null) (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out index 7d3a16b6ee..edb2b92f73 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -1666,7 +1666,7 @@ POSTHOOK: Input: default@part 2 46 46 46 23 46 -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: explain select * from src b @@ -1693,10 +1693,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 2) - Reducer 7 <- Map 6 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1704,32 +1703,35 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 6 + Map 5 Map Operator Tree: TableScan alias: src @@ -1751,7 +1753,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) @@ -1768,45 +1770,40 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 1 outputColumnNames: _col1, _col2 - Statistics: Num rows: 83 Data size: 16236 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col2 (type: string), _col1 (type: string) - mode: hash + Statistics: Num rows: 166 Data size: 32473 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 16236 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 16236 Basic stats: COMPLETE Column stats: NONE - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 8020 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 8020 Basic stats: COMPLETE Column stats: NONE - Reducer 7 + Statistics: Num rows: 166 Data size: 32473 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 32473 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 32473 Basic stats: COMPLETE Column stats: NONE + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Filter Operator + predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -1814,7 +1811,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[13][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product +Warning: Shuffle Join JOIN[16][tables = [$hdt$_1, $hdt$_2]] in Work 'Reducer 4' is a cross product PREHOOK: query: select * from src b where b.key in @@ -1873,32 +1870,35 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -1940,14 +1940,14 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) - 1 _col2 (type: string) - outputColumnNames: _col1, _col2, _col3 + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: CASE WHEN (_col3 is null) THEN (null) ELSE (_col2) END (type: string), _col1 (type: string) + expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -1968,16 +1968,19 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), true (type: boolean), _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string) - sort order: + - Map-reduce partition columns: _col2 (type: string) + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: boolean) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_views.q.out b/ql/src/test/results/clientpositive/spark/subquery_views.q.out index 91e39913a7..9a1c25fffd 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_views.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_views.q.out @@ -180,19 +180,19 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(key) keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: bigint) Map 15 Map Operator Tree: @@ -200,18 +200,18 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_11')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_11') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 18 Map Operator Tree: TableScan @@ -281,12 +281,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 16 Reduce Operator Tree: @@ -294,16 +294,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: boolean) Reducer 17 Reduce Operator Tree: @@ -382,12 +382,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 7 Reduce Operator Tree: @@ -395,16 +395,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: boolean) Reducer 8 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 81af937e97..ff1af2c483 100644 --- a/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -307,8 +307,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3:int, val 1), SelectColumnIsNotNull(col 1:int)) - predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 3:int, val 1), SelectColumnIsNotNull(col 1:int), SelectColumnIsNotNull(col 0:int)) + predicate: ((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) @@ -349,22 +349,23 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14:string, val AIR), FilterLongColEqualLongColumn(col 3:int, col 3:int)) - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 14:string, val AIR), FilterLongColEqualLongScalar(col 3:int, val 1), SelectColumnIsNotNull(col 0:int)) + predicate: ((l_linenumber = 1) and (l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: l_orderkey (type: int), l_linenumber (type: int) + expressions: l_orderkey (type: int), 1 (type: int) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3] + projectedOutputColumnNums: [0, 17] + selectExpressions: ConstantVectorExpression(val 1) -> 17:int Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 3:int + keyExpressions: col 0:int, col 17:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] diff --git a/ql/src/test/results/clientpositive/subquery_exists.q.out b/ql/src/test/results/clientpositive/subquery_exists.q.out index c9f2a79041..b6b31aaf47 100644 --- a/ql/src/test/results/clientpositive/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists.q.out @@ -27,35 +27,38 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -64,10 +67,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -217,16 +220,19 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1036,13 +1042,13 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tx1 PREHOOK: query: insert into tx1 values (1, 1), - (1, 2), - (1, 3) + (1, 2), + (1, 3) PREHOOK: type: QUERY PREHOOK: Output: default@tx1 POSTHOOK: query: insert into tx1 values (1, 1), - (1, 2), - (1, 3) + (1, 2), + (1, 3) POSTHOOK: type: QUERY POSTHOOK: Output: default@tx1 POSTHOOK: Lineage: tx1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] @@ -1065,137 +1071,74 @@ POSTHOOK: query: explain select count(*) as result,3 as expected from tx1 u where exists (select * from tx1 v where u.a=v.a and u.b <> v.b) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-1 depends on stages: Stage-3 + Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: u Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int) - outputColumnNames: a, b + Filter Operator + predicate: a is not null (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: a (type: int), b (type: int) - mode: hash + Select Operator + expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int), KEY._col1 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: + value expressions: _col1 (type: int) TableScan alias: v Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (a is not null and b is not null) (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Semi Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col3 <> _col1) (type: boolean) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: u + Select Operator Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: int), _col1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1269,135 +1212,76 @@ POSTHOOK: type: QUERY POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.i EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] POSTHOOK: Lineage: t2.j EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: j (type: int) - outputColumnNames: j + Filter Operator + predicate: i is not null (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: j (type: int) - mode: hash - outputColumnNames: _col0 + Select Operator + expressions: i (type: int), j (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: + value expressions: _col1 (type: int) TableScan alias: t2 Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: (i is not null and j is not null) (type: boolean) Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 <> _col2) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash + Select Operator + expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: i (type: int), j (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 _col0 (type: int), _col1 (type: int) - 1 _col0 (type: int), _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + residual filter predicates: {(_col1 <> _col3)} + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1405,7 +1289,6 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[12][tables = [$hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j) PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git a/ql/src/test/results/clientpositive/subquery_exists_having.q.out b/ql/src/test/results/clientpositive/subquery_exists_having.q.out index 2c41ff6c33..ef06dfe697 100644 --- a/ql/src/test/results/clientpositive/subquery_exists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -30,9 +30,8 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -74,22 +73,22 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -172,9 +171,8 @@ STAGE PLANS: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -192,33 +190,33 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value > 'val_9')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_9') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 583 Data size: 6193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 291 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 874 Data size: 9284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 @@ -235,7 +233,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Mux Operator - Statistics: Num rows: 874 Data size: 9284 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 999 Data size: 10612 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Left Semi Join 0 to 1 diff --git a/ql/src/test/results/clientpositive/subquery_in_having.q.out b/ql/src/test/results/clientpositive/subquery_in_having.q.out deleted file mode 100644 index 6893442b61..0000000000 --- a/ql/src/test/results/clientpositive/subquery_in_having.q.out +++ /dev/null @@ -1,2770 +0,0 @@ -PREHOOK: query: DROP TABLE IF EXISTS part_subq -PREHOOK: type: DROPTABLE -POSTHOOK: query: DROP TABLE IF EXISTS part_subq -POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE part_subq( - p_partkey INT, - p_name STRING, - p_mfgr STRING, - p_brand STRING, - p_type STRING, - p_size INT, - p_container STRING, - p_retailprice DOUBLE, - p_comment STRING -) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@part_subq -POSTHOOK: query: CREATE TABLE part_subq( - p_partkey INT, - p_name STRING, - p_mfgr STRING, - p_brand STRING, - p_type STRING, - p_size INT, - p_container STRING, - p_retailprice DOUBLE, - p_comment STRING -) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@part_subq -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part_subq -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@part_subq -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part_subq -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@part_subq -PREHOOK: query: explain - select key, count(*) -from src -group by key -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -PREHOOK: type: QUERY -POSTHOOK: query: explain - select key, count(*) -from src -group by key -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Map-reduce partition columns: _col1 (type: bigint) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -90 3 -92 1 -95 2 -96 1 -97 2 -98 2 -PREHOOK: query: select key, count(*) -from src -group by key -having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select key, count(*) -from src -group by key -having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 3 -119 3 -128 3 -167 3 -187 3 -193 3 -199 3 -208 3 -273 3 -298 3 -311 3 -316 3 -318 3 -327 3 -35 3 -369 3 -384 3 -396 3 -403 3 -409 3 -417 3 -430 3 -431 3 -438 3 -454 3 -466 3 -480 3 -498 3 -5 3 -70 3 -90 3 -PREHOOK: query: explain -select p_mfgr, avg(p_size) -from part_subq b -group by b.p_mfgr -having b.p_mfgr in - (select p_mfgr - from part_subq - group by p_mfgr - having max(p_size) - min(p_size) < 20 - ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select p_mfgr, avg(p_size) -from part_subq b -group by b.p_mfgr -having b.p_mfgr in - (select p_mfgr - from part_subq - group by p_mfgr - having max(p_size) - min(p_size) < 20 - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-3 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(p_size) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: part_subq - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(p_size), min(p_size) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) < 20) (type: boolean) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select p_mfgr, avg(p_size) -from part_subq b -group by b.p_mfgr -having b.p_mfgr in - (select p_mfgr - from part_subq - group by p_mfgr - having max(p_size) - min(p_size) < 20 - ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select p_mfgr, avg(p_size) -from part_subq b -group by b.p_mfgr -having b.p_mfgr in - (select p_mfgr - from part_subq - group by p_mfgr - having max(p_size) - min(p_size) < 20 - ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(p_size) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) - TableScan - alias: part_subq - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_mfgr is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(p_size), min(p_size) - keys: p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int) - Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2 Data size: 6346 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 2 Data size: 6346 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) < 20) (type: boolean) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 2 Data size: 6346 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select b.key, min(b.value) -from src b -group by b.key -having b.key in ( select a.key - from src a - where a.value > 'val_9' and a.value = min(b.value) - ) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select b.key, min(b.value) -from src b -group by b.key -having b.key in ( select a.key - from src a - where a.value > 'val_9' and a.value = min(b.value) - ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -90 val_90 -92 val_92 -95 val_95 -96 val_96 -97 val_97 -98 val_98 -PREHOOK: query: explain -select key, value, count(*) -from src b -where b.key in (select key from src where src.key > '8') -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, value, count(*) -from src b -where b.key in (select key from src where src.key > '8') -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2, Stage-5 - Stage-5 is a root stage - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: bigint) - sort order: + - Map-reduce partition columns: _col2 (type: bigint) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select key, value, count(*) -from src b -where b.key in (select key from src where src.key > '8') -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select key, value, count(*) -from src b -where b.key in (select key from src where src.key > '8') -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -80 val_80 1 -82 val_82 1 -83 val_83 2 -84 val_84 2 -85 val_85 1 -86 val_86 1 -87 val_87 1 -9 val_9 1 -90 val_90 3 -92 val_92 1 -95 val_95 2 -96 val_96 1 -97 val_97 2 -98 val_98 2 -PREHOOK: query: explain -select key, value, count(*) -from src b -where b.key in (select key from src where src.key > '8') -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, value, count(*) -from src b -where b.key in (select key from src where src.key > '8') -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-7 depends on stages: Stage-2, Stage-5 , consists of Stage-9, Stage-3 - Stage-9 has a backup stage: Stage-3 - Stage-6 depends on stages: Stage-9 - Stage-3 - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-6, Stage-3 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: bigint) - sort order: + - Map-reduce partition columns: _col2 (type: bigint) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '8') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: explain -select key, value, count(*) -from src b -where b.key in (select key from src where src.value = b.value) -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, value, count(*) -from src b -where b.key in (select key from src where src.value = b.value) -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-7 depends on stages: Stage-2, Stage-5 , consists of Stage-9, Stage-3 - Stage-9 has a backup stage: Stage-3 - Stage-6 depends on stages: Stage-9 - Stage-3 - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 - Stage-0 depends on stages: Stage-6, Stage-3 - -STAGE PLANS: - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-7 - Conditional Operator - - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: bigint) - sort order: + - Map-reduce partition columns: _col2 (type: bigint) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col2 is not null (type: boolean) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select key, value, count(*) -from src b -where b.key in (select key from src where src.value = b.value) -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select key, value, count(*) -from src b -where b.key in (select key from src where src.value = b.value) -group by key, value -having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0 val_0 3 -10 val_10 1 -100 val_100 2 -103 val_103 2 -104 val_104 2 -105 val_105 1 -11 val_11 1 -111 val_111 1 -113 val_113 2 -114 val_114 1 -116 val_116 1 -118 val_118 2 -119 val_119 3 -12 val_12 2 -120 val_120 2 -125 val_125 2 -126 val_126 1 -128 val_128 3 -129 val_129 2 -131 val_131 1 -133 val_133 1 -134 val_134 2 -136 val_136 1 -137 val_137 2 -143 val_143 1 -145 val_145 1 -146 val_146 2 -149 val_149 2 -15 val_15 2 -150 val_150 1 -152 val_152 2 -153 val_153 1 -155 val_155 1 -156 val_156 1 -157 val_157 1 -158 val_158 1 -160 val_160 1 -162 val_162 1 -163 val_163 1 -164 val_164 2 -165 val_165 2 -166 val_166 1 -167 val_167 3 -168 val_168 1 -17 val_17 1 -170 val_170 1 -172 val_172 2 -174 val_174 2 -175 val_175 2 -176 val_176 2 -177 val_177 1 -178 val_178 1 -179 val_179 2 -18 val_18 2 -180 val_180 1 -181 val_181 1 -183 val_183 1 -186 val_186 1 -187 val_187 3 -189 val_189 1 -19 val_19 1 -190 val_190 1 -191 val_191 2 -192 val_192 1 -193 val_193 3 -194 val_194 1 -195 val_195 2 -196 val_196 1 -197 val_197 2 -199 val_199 3 -2 val_2 1 -20 val_20 1 -200 val_200 2 -201 val_201 1 -202 val_202 1 -203 val_203 2 -205 val_205 2 -207 val_207 2 -208 val_208 3 -209 val_209 2 -213 val_213 2 -214 val_214 1 -216 val_216 2 -217 val_217 2 -218 val_218 1 -219 val_219 2 -221 val_221 2 -222 val_222 1 -223 val_223 2 -224 val_224 2 -226 val_226 1 -228 val_228 1 -229 val_229 2 -233 val_233 2 -235 val_235 1 -237 val_237 2 -238 val_238 2 -239 val_239 2 -24 val_24 2 -241 val_241 1 -242 val_242 2 -244 val_244 1 -247 val_247 1 -248 val_248 1 -249 val_249 1 -252 val_252 1 -255 val_255 2 -256 val_256 2 -257 val_257 1 -258 val_258 1 -26 val_26 2 -260 val_260 1 -262 val_262 1 -263 val_263 1 -265 val_265 2 -266 val_266 1 -27 val_27 1 -272 val_272 2 -273 val_273 3 -274 val_274 1 -275 val_275 1 -278 val_278 2 -28 val_28 1 -280 val_280 2 -281 val_281 2 -282 val_282 2 -283 val_283 1 -284 val_284 1 -285 val_285 1 -286 val_286 1 -287 val_287 1 -288 val_288 2 -289 val_289 1 -291 val_291 1 -292 val_292 1 -296 val_296 1 -298 val_298 3 -30 val_30 1 -302 val_302 1 -305 val_305 1 -306 val_306 1 -307 val_307 2 -308 val_308 1 -309 val_309 2 -310 val_310 1 -311 val_311 3 -315 val_315 1 -316 val_316 3 -317 val_317 2 -318 val_318 3 -321 val_321 2 -322 val_322 2 -323 val_323 1 -325 val_325 2 -327 val_327 3 -33 val_33 1 -331 val_331 2 -332 val_332 1 -333 val_333 2 -335 val_335 1 -336 val_336 1 -338 val_338 1 -339 val_339 1 -34 val_34 1 -341 val_341 1 -342 val_342 2 -344 val_344 2 -345 val_345 1 -35 val_35 3 -351 val_351 1 -353 val_353 2 -356 val_356 1 -360 val_360 1 -362 val_362 1 -364 val_364 1 -365 val_365 1 -366 val_366 1 -367 val_367 2 -368 val_368 1 -369 val_369 3 -37 val_37 2 -373 val_373 1 -374 val_374 1 -375 val_375 1 -377 val_377 1 -378 val_378 1 -379 val_379 1 -382 val_382 2 -384 val_384 3 -386 val_386 1 -389 val_389 1 -392 val_392 1 -393 val_393 1 -394 val_394 1 -395 val_395 2 -396 val_396 3 -397 val_397 2 -399 val_399 2 -4 val_4 1 -400 val_400 1 -402 val_402 1 -403 val_403 3 -404 val_404 2 -407 val_407 1 -409 val_409 3 -41 val_41 1 -411 val_411 1 -413 val_413 2 -414 val_414 2 -417 val_417 3 -418 val_418 1 -419 val_419 1 -42 val_42 2 -421 val_421 1 -424 val_424 2 -427 val_427 1 -429 val_429 2 -43 val_43 1 -430 val_430 3 -431 val_431 3 -432 val_432 1 -435 val_435 1 -436 val_436 1 -437 val_437 1 -438 val_438 3 -439 val_439 2 -44 val_44 1 -443 val_443 1 -444 val_444 1 -446 val_446 1 -448 val_448 1 -449 val_449 1 -452 val_452 1 -453 val_453 1 -454 val_454 3 -455 val_455 1 -457 val_457 1 -458 val_458 2 -459 val_459 2 -460 val_460 1 -462 val_462 2 -463 val_463 2 -466 val_466 3 -467 val_467 1 -47 val_47 1 -470 val_470 1 -472 val_472 1 -475 val_475 1 -477 val_477 1 -478 val_478 2 -479 val_479 1 -480 val_480 3 -481 val_481 1 -482 val_482 1 -483 val_483 1 -484 val_484 1 -485 val_485 1 -487 val_487 1 -490 val_490 1 -491 val_491 1 -492 val_492 2 -493 val_493 1 -494 val_494 1 -495 val_495 1 -496 val_496 1 -497 val_497 1 -498 val_498 3 -5 val_5 3 -51 val_51 2 -53 val_53 1 -54 val_54 1 -57 val_57 1 -58 val_58 2 -64 val_64 1 -65 val_65 1 -66 val_66 1 -67 val_67 2 -69 val_69 1 -70 val_70 3 -72 val_72 2 -74 val_74 1 -76 val_76 2 -77 val_77 1 -78 val_78 1 -8 val_8 1 -80 val_80 1 -82 val_82 1 -83 val_83 2 -84 val_84 2 -85 val_85 1 -86 val_86 1 -87 val_87 1 -9 val_9 1 -90 val_90 3 -92 val_92 1 -95 val_95 2 -96 val_96 1 -97 val_97 2 -98 val_98 2 -PREHOOK: query: explain -select p_mfgr, p_name, avg(p_size) -from part_subq -group by p_mfgr, p_name -having p_name in - (select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select p_mfgr, p_name, avg(p_size) -from part_subq -group by p_mfgr, p_name -having p_name in - (select first_value(p_name) over(partition by p_mfgr order by p_size) from part_subq) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-5 depends on stages: Stage-1, Stage-3 , consists of Stage-6, Stage-2 - Stage-6 has a backup stage: Stage-2 - Stage-4 depends on stages: Stage-6 - Stage-2 - Stage-3 is a root stage - Stage-0 depends on stages: Stage-4, Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: part_subq - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: avg(p_size) - keys: p_name (type: string), p_mfgr (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) - Reduce Operator Tree: - Group By Operator - aggregations: avg(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-5 - Conditional Operator - - Stage: Stage-6 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: double) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3490 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: part_subq - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: p_mfgr (type: string), p_size (type: int) - sort order: ++ - Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Function definitions: - Input definition - input alias: ptf_0 - output shape: _col1: string, _col2: string, _col5: int - type: WINDOWING - Windowing table definition - input alias: ptf_1 - name: windowingtablefunction - order by: _col5 ASC NULLS FIRST - partition by: _col2 - raw input shape: - window functions: - window function definition - alias: first_value_window_0 - arguments: _col1 - name: first_value - window function: GenericUDAFFirstValueEvaluator - window frame: RANGE PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: first_value_window_0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: first_value_window_0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: CREATE TABLE src_null (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@src_null -POSTHOOK: query: CREATE TABLE src_null (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@src_null -PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@src_null -POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/kv1.txt" INTO TABLE src_null -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@src_null -PREHOOK: query: INSERT INTO src_null values('5444', null) -PREHOOK: type: QUERY -PREHOOK: Output: default@src_null -POSTHOOK: query: INSERT INTO src_null values('5444', null) -POSTHOOK: type: QUERY -POSTHOOK: Output: default@src_null -POSTHOOK: Lineage: src_null.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: src_null.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -Warning: Map Join MAPJOIN[185][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[186][bigTable=?] in task 'Stage-8:MAPRED' is a cross product -Warning: Map Join MAPJOIN[187][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[188][bigTable=?] in task 'Stage-14:MAPRED' is a cross product -Warning: Map Join MAPJOIN[189][bigTable=?] in task 'Stage-15:MAPRED' is a cross product -PREHOOK: query: explain -select key, value, count(*) -from src_null b -where NOT EXISTS (select key from src_null where src_null.value <> b.value) -group by key, value -having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, value, count(*) -from src_null b -where NOT EXISTS (select key from src_null where src_null.value <> b.value) -group by key, value -having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-27 depends on stages: Stage-6 - Stage-1 depends on stages: Stage-27 - Stage-2 depends on stages: Stage-1 - Stage-20 depends on stages: Stage-2, Stage-8 , consists of Stage-26, Stage-3 - Stage-26 has a backup stage: Stage-3 - Stage-19 depends on stages: Stage-26 - Stage-4 depends on stages: Stage-3, Stage-14, Stage-19 - Stage-3 - Stage-12 is a root stage - Stage-29 depends on stages: Stage-12 - Stage-9 depends on stages: Stage-29 - Stage-10 depends on stages: Stage-9 - Stage-28 depends on stages: Stage-10 - Stage-8 depends on stages: Stage-28 - Stage-18 is a root stage - Stage-31 depends on stages: Stage-18 - Stage-15 depends on stages: Stage-31 - Stage-16 depends on stages: Stage-15 - Stage-30 depends on stages: Stage-16 - Stage-14 depends on stages: Stage-30 - Stage-0 depends on stages: Stage-4 - -STAGE PLANS: - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-27 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:src_null - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:src_null - TableScan - alias: src_null - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> _col1) (type: boolean) - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2 Data size: 17461 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 3 Data size: 26191 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8730 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 3 Data size: 26191 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col2 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-20 - Conditional Operator - - Stage: Stage-26 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME1 - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-19 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string), _col2 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col4 (type: bigint), _col5 (type: bigint) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 3 Data size: 8731 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string), _col2 (type: bigint) - 1 _col1 (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2910 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2910 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2910 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), _col1 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 2910 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 3 Data size: 8731 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string), _col2 (type: bigint) - 1 _col1 (type: string), _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: (not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col2 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: bigint) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col4, _col5 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-29 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:src_null - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:src_null - TableScan - alias: src_null - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-9 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> _col1) (type: boolean) - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2 Data size: 17461 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 3 Data size: 26191 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8730 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 3 Data size: 26191 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-28 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:$hdt$_1:$hdt$_1:s1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_1:$hdt$_1:s1 - TableScan - alias: s1 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 <> _col2) (type: boolean) - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col2) - keys: _col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-18 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: value - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: value (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-31 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:$hdt$_4:$hdt$_4:src_null - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_3:$hdt$_3:$hdt$_4:$hdt$_4:src_null - TableScan - alias: src_null - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-15 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 <> _col1) (type: boolean) - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 11641 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2 Data size: 17461 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 3 Data size: 26191 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8730 Basic stats: COMPLETE Column stats: NONE - Mux Operator - Statistics: Num rows: 3 Data size: 26191 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-16 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col1 (type: string) - mode: complete - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-30 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:s1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:$hdt$_2:s1 - TableScan - alias: s1 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '9') (type: boolean) - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5820 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 - 1 - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 <> _col2) (type: boolean) - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col1, _col2 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col2 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 5821 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -Warning: Map Join MAPJOIN[185][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[186][bigTable=?] in task 'Stage-8:MAPRED' is a cross product -Warning: Map Join MAPJOIN[187][bigTable=?] in task 'Stage-9:MAPRED' is a cross product -Warning: Map Join MAPJOIN[188][bigTable=?] in task 'Stage-14:MAPRED' is a cross product -Warning: Map Join MAPJOIN[189][bigTable=?] in task 'Stage-15:MAPRED' is a cross product -PREHOOK: query: select key, value, count(*) -from src_null b -where NOT EXISTS (select key from src_null where src_null.value <> b.value) -group by key, value -having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) -PREHOOK: type: QUERY -PREHOOK: Input: default@src_null -#### A masked pattern was here #### -POSTHOOK: query: select key, value, count(*) -from src_null b -where NOT EXISTS (select key from src_null where src_null.value <> b.value) -group by key, value -having count(*) not in (select count(*) from src_null s1 where s1.key > '9' and s1.value <> b.value group by s1.key ) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_null -#### A masked pattern was here #### -5444 NULL 1 -PREHOOK: query: DROP TABLE src_null -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@src_null -PREHOOK: Output: default@src_null -POSTHOOK: query: DROP TABLE src_null -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@src_null -POSTHOOK: Output: default@src_null -PREHOOK: query: DROP TABLE part_subq -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@part_subq -PREHOOK: Output: default@part_subq -POSTHOOK: query: DROP TABLE part_subq -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@part_subq -POSTHOOK: Output: default@part_subq diff --git a/ql/src/test/results/clientpositive/subquery_notexists.q.out b/ql/src/test/results/clientpositive/subquery_notexists.q.out index 329573e8e1..a6175f8fec 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -29,28 +29,28 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_2')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_2') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -78,7 +78,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -280,37 +280,37 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (value > 'val_2')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (value > 'val_2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -339,7 +339,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out index 4d2b2fc873..0d90e1b548 100644 --- a/ql/src/test/results/clientpositive/subquery_notexists_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -72,7 +72,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -105,28 +105,28 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key = key) and (value = value) and (value > 'val_12')) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + predicate: ((value > 'val_12') and key is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -255,7 +255,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Reduce Operator Tree: Join Operator @@ -288,37 +288,37 @@ STAGE PLANS: alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((value = value) and (value > 'val_12')) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (value > 'val_12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string), key (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col1 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 212 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/subquery_notin_having.q.out b/ql/src/test/results/clientpositive/subquery_notin_having.q.out index c321fe69ed..433609d016 100644 --- a/ql/src/test/results/clientpositive/subquery_notin_having.q.out +++ b/ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -286,7 +286,7 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -319,7 +319,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -375,18 +375,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col2 - _col1) > 600.0) and (_col1 = _col1)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col2 - _col1) > 600.0) and _col1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col0) keys: _col1 (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -402,7 +402,7 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -410,7 +410,7 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -448,19 +448,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((_col2 - _col1) > 600.0) and (_col1 = _col1)) (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + predicate: (((_col2 - _col1) > 600.0) and _col1 is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: double), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out index 5c306f6b47..79b7d83619 100644 --- a/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out +++ b/ql/src/test/results/clientpositive/subquery_unqualcolumnrefs.q.out @@ -49,35 +49,38 @@ STAGE PLANS: TableScan alias: src11 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key1 (type: string), value1 (type: string) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: ((key1 > '9') and value1 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Select Operator + expressions: key1 (type: string), value1 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -86,10 +89,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -116,35 +119,38 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -153,10 +159,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -187,45 +193,72 @@ where b.key in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: b + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan - alias: src + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '9') and (value = value)) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -234,10 +267,10 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -284,20 +317,20 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -318,20 +351,20 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(_col0) keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -347,7 +380,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -355,7 +388,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -384,7 +417,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: Join Operator @@ -417,7 +450,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: boolean) Reduce Operator Tree: Join Operator @@ -450,20 +483,20 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_mfgr = p_mfgr) (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: p_mfgr (type: string), p_size (type: int) sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: p_name (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition @@ -484,19 +517,19 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (rank_window_0 <= 2) (type: boolean) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -512,20 +545,20 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: diff --git a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out deleted file mode 100644 index ddea584990..0000000000 --- a/ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ /dev/null @@ -1,444 +0,0 @@ -PREHOOK: query: explain vectorization expression -select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression -select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1:int) - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 1:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:li - Fetch Operator - limit: -1 - $hdt$_2:lineitem - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:li - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_linenumber = 1) and l_orderkey is not null and l_partkey is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - $hdt$_2:lineitem - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - outputColumnNames: _col0, _col3 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -108570 8571 -115118 7630 -115209 7721 -155190 7706 -2320 9821 -40216 217 -4297 1798 -61336 8855 -64128 9141 -82704 7721 -PREHOOK: query: explain vectorization expression -select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression -select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-8 depends on stages: Stage-1 - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 1:int) - predicate: l_partkey is not null (type: boolean) - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 1:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: l_partkey (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Reduce Vectorization: - enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:li - Fetch Operator - limit: -1 - $hdt$_2:lineitem - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:li - TableScan - alias: li - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_linenumber = 1) and l_partkey is not null) (type: boolean) - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int), l_partkey (type: int), l_suppkey (type: int), 1 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - $hdt$_2:lineitem - TableScan - alias: lineitem - Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_orderkey (type: int), l_linenumber (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - TableScan Vectorization: - native: true - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 55 Data size: 6598 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int), _col4 (type: int) - 1 _col0 (type: int), _col1 (type: int) - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - outputColumnNames: _col0, _col3 - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 60 Data size: 7257 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) -PREHOOK: type: QUERY -PREHOOK: Input: default@lineitem -#### A masked pattern was here #### -POSTHOOK: query: select p.p_partkey, li.l_suppkey -from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey -where li.l_linenumber = 1 and - li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@lineitem -#### A masked pattern was here #### -108570 8571 -4297 1798