diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index c95580488b..e99e6d3fe1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -17,12 +17,14 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; @@ -64,6 +66,7 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.commons.lang3.tuple.Triple; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -538,22 +541,213 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper return false; } - public static Pair> isRewritablePKFKJoin(RelBuilder builder, Join join, + /** + * Returns a triple where first value represents whether we could extract a FK-PK join + * or not, the second value is a pair with the column from left and right input that + * are used for the FK-PK join, and the third value are the predicates that are not + * part of the FK-PK condition. Currently we can only extract one FK-PK join. + */ + public static PKFKJoinInfo extractPKFKJoin( + Join join, List joinFilters, boolean leftInputPotentialFK, RelMetadataQuery mq) { + final List residualPreds = new ArrayList<>(); + final JoinRelType joinType = join.getJoinType(); + final RelNode fkInput = leftInputPotentialFK ? join.getLeft() : join.getRight(); + final PKFKJoinInfo cannotExtract = + PKFKJoinInfo.of(false, null, null); + + if (joinType != JoinRelType.INNER) { + // If it is not an inner, we transform it as the metadata + // providers for expressions do not pull information through + // outer join (as it would not be correct) + join = join.copy(join.getTraitSet(), join.getCluster().getRexBuilder().makeLiteral(true), + join.getLeft(), join.getRight(), JoinRelType.INNER, false); + } + + // 1) Gather all tables from the FK side and the table from the + // non-FK side + final Set leftTables = mq.getTableReferences(join.getLeft()); + final Set rightTables = + Sets.difference(mq.getTableReferences(join), mq.getTableReferences(join.getLeft())); + final Set fkTables = join.getLeft() == fkInput ? leftTables : rightTables; + final Set nonFkTables = join.getLeft() == fkInput ? rightTables : leftTables; + + // 2) Check whether there is a FK relationship + Set candidatePredicates = new HashSet<>(); + EquivalenceClasses ec = new EquivalenceClasses(); + for (RexNode conj : joinFilters) { + if (!conj.isA(SqlKind.EQUALS)) { + // Not an equality, continue + residualPreds.add(conj); + continue; + } + RexCall equiCond = (RexCall) conj; + RexNode eqOp1 = equiCond.getOperands().get(0); + if (!RexUtil.isReferenceOrAccess(eqOp1, true)) { + // Ignore + residualPreds.add(conj); + continue; + } + Set eqOp1ExprsLineage = mq.getExpressionLineage(join, eqOp1); + if (eqOp1ExprsLineage == null) { + // Cannot be mapped, continue + residualPreds.add(conj); + continue; + } + RexNode eqOp2 = equiCond.getOperands().get(1); + if (!RexUtil.isReferenceOrAccess(eqOp2, true)) { + // Ignore + residualPreds.add(conj); + continue; + } + Set eqOp2ExprsLineage = mq.getExpressionLineage(join, eqOp2); + if (eqOp2ExprsLineage == null) { + // Cannot be mapped, continue + residualPreds.add(conj); + continue; + } + List eqOp2ExprsFiltered = null; + for (RexNode eqOpExprLineage1 : eqOp1ExprsLineage) { + RexTableInputRef inputRef1 = extractTableInputRef(eqOpExprLineage1); + if (inputRef1 == null) { + // This condition could not be map into an input reference + continue; + } + if (eqOp2ExprsFiltered == null) { + // First iteration + eqOp2ExprsFiltered = new ArrayList<>(); + for (RexNode eqOpExprLineage2 : eqOp2ExprsLineage) { + RexTableInputRef inputRef2 = extractTableInputRef(eqOpExprLineage2); + if (inputRef2 == null) { + // Bail out as this condition could not be map into an input reference + continue; + } + // Add to list of expressions for follow-up iterations + eqOp2ExprsFiltered.add(inputRef2); + // Add to equivalence classes and backwards mapping + ec.addEquivalence(inputRef1, inputRef2, equiCond); + candidatePredicates.add(equiCond); + } + } else { + // Rest of iterations, only adding, no checking + for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { + ec.addEquivalence(inputRef1, inputRef2, equiCond); + } + } + } + if (!candidatePredicates.contains(conj)) { + // We add it to residual already + residualPreds.add(conj); + } + } + if (ec.getEquivalenceClassesMap().isEmpty()) { + // This may be a cartesian product, we bail out + return cannotExtract; + } + + // 4) For each table, check whether there is a matching on the non-FK side. + // If there is and it is the only condition, we are ready to transform + for (final RelTableRef nonFkTable : nonFkTables) { + final List nonFkTableQName = nonFkTable.getQualifiedName(); + for (RelTableRef tRef : fkTables) { + List constraints = tRef.getTable().getReferentialConstraints(); + for (RelReferentialConstraint constraint : constraints) { + if (constraint.getTargetQualifiedName().equals(nonFkTableQName)) { + EquivalenceClasses ecT = EquivalenceClasses.copy(ec); + Set removedOriginalPredicates = new HashSet<>(); + ImmutableBitSet.Builder lBitSet = ImmutableBitSet.builder(); + ImmutableBitSet.Builder rBitSet = ImmutableBitSet.builder(); + boolean allContained = true; + for (int pos = 0; pos < constraint.getNumColumns(); pos++) { + int foreignKeyPos = constraint.getColumnPairs().get(pos).source; + RelDataType foreignKeyColumnType = + tRef.getTable().getRowType().getFieldList().get(foreignKeyPos).getType(); + RexTableInputRef foreignKeyColumnRef = + RexTableInputRef.of(tRef, foreignKeyPos, foreignKeyColumnType); + int uniqueKeyPos = constraint.getColumnPairs().get(pos).target; + RexTableInputRef uniqueKeyColumnRef = RexTableInputRef.of(nonFkTable, uniqueKeyPos, + nonFkTable.getTable().getRowType().getFieldList().get(uniqueKeyPos).getType()); + if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { + // Remove this condition from eq classes as we have checked that it is present + // in the join condition. In turn, populate the columns that are referenced + // from the join inputs + for (RexCall originalPred : ecT.removeEquivalence(uniqueKeyColumnRef, foreignKeyColumnRef)) { + ImmutableBitSet leftCols = RelOptUtil.InputFinder.bits(originalPred.getOperands().get(0)); + ImmutableBitSet rightCols = RelOptUtil.InputFinder.bits(originalPred.getOperands().get(1)); + // Get length and flip column references if join condition specified in + // reverse order to join sources + int nFieldsLeft = join.getLeft().getRowType().getFieldList().size(); + int nFieldsRight = join.getRight().getRowType().getFieldList().size(); + int nSysFields = join.getSystemFieldList().size(); + ImmutableBitSet rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft, + nSysFields + nFieldsLeft + nFieldsRight); + if (rightFieldsBitSet.contains(leftCols)) { + ImmutableBitSet t = leftCols; + leftCols = rightCols; + rightCols = t; + } + lBitSet.set(leftCols.nextSetBit(0) - nSysFields); + rBitSet.set(rightCols.nextSetBit(0) - (nSysFields + nFieldsLeft)); + removedOriginalPredicates.add(originalPred); + } + } else { + // No relationship, we cannot do anything + allContained = false; + break; + } + } + if (allContained) { + // This is a PK-FK, reassign equivalence classes and remove conditions + // TODO: Support inference of multiple PK-FK relationships + + // 4.1) Add to residual whatever is remaining + candidatePredicates.removeAll(removedOriginalPredicates); + residualPreds.addAll(candidatePredicates); + // 4.2) Return result + return PKFKJoinInfo.of(true, Pair.of(lBitSet.build(), rBitSet.build()), residualPreds); + } + } + } + } + } + + return cannotExtract; + } + + public static class PKFKJoinInfo { + public final boolean isPkFkJoin; + public final Pair pkFkJoinColumns; + public final List additionalPredicates; + + private PKFKJoinInfo(boolean isPkFkJoin, Pair pkFkJoinColumns, + List additionalPredicates) { + this.isPkFkJoin = isPkFkJoin; + this.pkFkJoinColumns = pkFkJoinColumns; + this.additionalPredicates = additionalPredicates == null ? null : + ImmutableList.copyOf(additionalPredicates); + } + + public static PKFKJoinInfo of(boolean isPkFkJoin, Pair pkFkJoinColumns, + List additionalPredicates) { + return new PKFKJoinInfo(isPkFkJoin, pkFkJoinColumns, additionalPredicates); + } + } + + public static RewritablePKFKJoinInfo isRewritablePKFKJoin(Join join, boolean leftInputPotentialFK, RelMetadataQuery mq) { final JoinRelType joinType = join.getJoinType(); final RexNode cond = join.getCondition(); final RelNode fkInput = leftInputPotentialFK ? join.getLeft() : join.getRight(); final RelNode nonFkInput = leftInputPotentialFK ? join.getRight() : join.getLeft(); - final Pair> nonRewritable = Pair.of(false, null); + final RewritablePKFKJoinInfo nonRewritable = RewritablePKFKJoinInfo.of(false, null); if (joinType != JoinRelType.INNER) { // If it is not an inner, we transform it as the metadata // providers for expressions do not pull information through // outer join (as it would not be correct) - join = (Join) builder - .push(join.getLeft()).push(join.getRight()) - .join(JoinRelType.INNER, cond) - .build(); + join = join.copy(join.getTraitSet(), cond, + join.getLeft(), join.getRight(), JoinRelType.INNER, + false); } // 1) Check whether there is any filtering condition on the @@ -604,13 +798,13 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper // Add to list of expressions for follow-up iterations eqOp2ExprsFiltered.add(inputRef2); // Add to equivalence classes and backwards mapping - ec.addEquivalenceClass(inputRef1, inputRef2); + ec.addEquivalence(inputRef1, inputRef2); refToRex.put(inputRef2, eqOp2); } } else { // Rest of iterations, only adding, no checking for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { - ec.addEquivalenceClass(inputRef1, inputRef2); + ec.addEquivalence(inputRef1, inputRef2); } } } @@ -667,14 +861,7 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper } // Remove this condition from eq classes as we have checked that it is present // in the join condition - ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).remove(foreignKeyColumnRef); - if (ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).size() == 1) { // self - ecT.getEquivalenceClassesMap().remove(uniqueKeyColumnRef); - } - ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).remove(uniqueKeyColumnRef); - if (ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).size() == 1) { // self - ecT.getEquivalenceClassesMap().remove(foreignKeyColumnRef); - } + ecT.removeEquivalence(uniqueKeyColumnRef, foreignKeyColumnRef); } else { // No relationship, we cannot do anything allContained = false; @@ -690,7 +877,22 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper } } - return Pair.of(canBeRewritten, nullableNodes); + return RewritablePKFKJoinInfo.of(canBeRewritten, nullableNodes); + } + + public static class RewritablePKFKJoinInfo { + public final boolean rewritable; + public final List nullableNodes; + + private RewritablePKFKJoinInfo(boolean rewritable, List nullableNodes) { + this.rewritable = rewritable; + this.nullableNodes = nullableNodes == null ? null : + ImmutableList.copyOf(nullableNodes); + } + + public static RewritablePKFKJoinInfo of(boolean rewritable, List nullableNodes) { + return new RewritablePKFKJoinInfo(rewritable, nullableNodes); + } } private static RexTableInputRef extractTableInputRef(RexNode node) { @@ -713,13 +915,23 @@ private static RexTableInputRef extractTableInputRef(RexNode node) { */ private static class EquivalenceClasses { + // Contains the node to equivalence class nodes private final Map> nodeToEquivalenceClass; + // Contains the pair of equivalences to original expression that they originate from + private final Multimap, RexCall> equivalenceToOriginalNode; protected EquivalenceClasses() { nodeToEquivalenceClass = new HashMap<>(); + equivalenceToOriginalNode = HashMultimap.create(); + } + + protected void addEquivalence(RexTableInputRef p1, RexTableInputRef p2, RexCall originalCond) { + addEquivalence(p1, p2); + equivalenceToOriginalNode.put(Pair.of(p1, p2), originalCond); + equivalenceToOriginalNode.put(Pair.of(p2, p1), originalCond); } - protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { + protected void addEquivalence(RexTableInputRef p1, RexTableInputRef p2) { Set c1 = nodeToEquivalenceClass.get(p1); Set c2 = nodeToEquivalenceClass.get(p2); if (c1 != null && c2 != null) { @@ -756,11 +968,30 @@ protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { return nodeToEquivalenceClass; } + // Returns the original nodes that the equivalences were generated from + protected Set removeEquivalence(RexTableInputRef p1, RexTableInputRef p2) { + nodeToEquivalenceClass.get(p1).remove(p2); + if (nodeToEquivalenceClass.get(p1).size() == 1) { // self + nodeToEquivalenceClass.remove(p1); + } + nodeToEquivalenceClass.get(p2).remove(p1); + if (nodeToEquivalenceClass.get(p2).size() == 1) { // self + nodeToEquivalenceClass.remove(p2); + } + Set originalNodes = new HashSet<>(); + originalNodes.addAll(equivalenceToOriginalNode.removeAll(Pair.of(p1, p2))); + originalNodes.addAll(equivalenceToOriginalNode.removeAll(Pair.of(p2, p1))); + return originalNodes; + } + protected static EquivalenceClasses copy(EquivalenceClasses ec) { final EquivalenceClasses newEc = new EquivalenceClasses(); for (Entry> e : ec.nodeToEquivalenceClass.entrySet()) { newEc.nodeToEquivalenceClass.put(e.getKey(), Sets.newLinkedHashSet(e.getValue())); } + for (Entry, Collection> e : ec.equivalenceToOriginalNode.asMap().entrySet()) { + newEc.equivalenceToOriginalNode.putAll(e.getKey(), e.getValue()); + } return newEc; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java index 534a5c9531..c735df81eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -59,6 +59,7 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.RewritablePKFKJoinInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -220,12 +221,12 @@ public void onMatch(RelOptRuleCall call) { } // 2) Check whether this join can be rewritten or removed - Pair> r = HiveRelOptUtil.isRewritablePKFKJoin(call.builder(), + RewritablePKFKJoinInfo r = HiveRelOptUtil.isRewritablePKFKJoin( join, leftInput == fkInput, call.getMetadataQuery()); // 3) If it is the only condition, we can trigger the rewriting - if (r.left) { - List nullableNodes = r.right; + if (r.rewritable) { + List nullableNodes = r.nullableNodes; // If we reach here, we trigger the transform if (mode == Mode.REMOVE) { if (rightInputPotentialFK) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 576ed34bf3..563260e415 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -34,6 +34,7 @@ import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdRowCount; +import org.apache.calcite.rel.metadata.RelMdUtil; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexBuilder; @@ -46,6 +47,8 @@ import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil.PKFKJoinInfo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -63,24 +66,48 @@ protected HiveRelMdRowCount() { } public Double getRowCount(Join join, RelMetadataQuery mq) { - PKFKRelationInfo pkfk = analyzeJoinForPKFK(join, mq); + // Try to infer from constraints first + final Pair constraintBasedResult = + constraintsBasedAnalyzeJoinForPKFK(join, mq); + if (constraintBasedResult != null) { + // We succeeded, we calculate the selectivity based on the inferred information + // and any residual predicate + double joinSelectivity = Math.min(1.0, + constraintBasedResult.left.pkInfo.selectivity * constraintBasedResult.left.ndvScalingFactor); + double residualSelectivity = RelMdUtil.guessSelectivity(constraintBasedResult.right); + double rowCount = constraintBasedResult.left.fkInfo.rowCount * joinSelectivity * residualSelectivity; + if (LOG.isDebugEnabled()) { + LOG.debug("Identified Primary - Foreign Key relation from constraints:\n {} {} Row count for join: {}\n" + + " Join selectivity: {}\n Residual selectivity: {}\n", RelOptUtil.toString(join), constraintBasedResult.left, + rowCount, joinSelectivity, residualSelectivity); + } + return rowCount; + } + // Otherwise, try to infer from stats + final PKFKRelationInfo pkfk = analyzeJoinForPKFK(join, mq); if (pkfk != null) { - double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); + double selectivity = pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor; selectivity = Math.min(1.0, selectivity); if (LOG.isDebugEnabled()) { - LOG.debug("Identified Primary - Foreign Key relation: {} {}",RelOptUtil.toString(join), pkfk); + LOG.debug("Identified Primary - Foreign Key relation: {} {}", RelOptUtil.toString(join), pkfk); } return pkfk.fkInfo.rowCount * selectivity; } + // If we cannot infer anything, then we just go to join.estimateRowCount(mq). // Do not call mq.getRowCount(join), will trigger CyclicMetadataException - return join.estimateRowCount(mq); + final Double rowCount = join.estimateRowCount(mq); + if (LOG.isDebugEnabled()) { + LOG.debug("No Primary - Foreign Key relation: \n{} Row count for join: {}\n", + RelOptUtil.toString(join), rowCount); + } + return rowCount; } @Override public Double getRowCount(SemiJoin rel, RelMetadataQuery mq) { PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq); if (pkfk != null) { - double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); + double selectivity = pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor; selectivity = Math.min(1.0, selectivity); if (LOG.isDebugEnabled()) { LOG.debug("Identified Primary - Foreign Key relation: {} {}", RelOptUtil.toString(rel), pkfk); @@ -217,10 +244,10 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery int rightColIdx = joinCols.right; RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); - RexNode leftPred = RexUtil - .composeConjunction(rexBuilder, leftFilters, true); - RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters, - true); + RexNode leftPred = RexUtil.composeConjunction( + rexBuilder, leftFilters, true); + RexNode rightPred = RexUtil.composeConjunction( + rexBuilder, rightFilters, true); ImmutableBitSet lBitSet = ImmutableBitSet.of(leftColIdx); ImmutableBitSet rBitSet = ImmutableBitSet.of(rightColIdx); @@ -228,11 +255,10 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return * null. */ - boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel - .getJoinType() == JoinRelType.RIGHT) - && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left, mq); - boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel - .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right, mq); + boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.getJoinType() == JoinRelType.RIGHT) + && isKey(lBitSet, left, mq); + boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.getJoinType() == JoinRelType.LEFT) + && isKey(rBitSet, right, mq); if (!leftIsKey && !rightIsKey) { return null; @@ -247,41 +273,37 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery } } - int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1; - - boolean isPKSideSimpleTree = pkSide != -1 ? - IsSimpleTreeOnJoinKey.check( - pkSide == 0 ? left : right, - pkSide == 0 ? leftColIdx : rightColIdx, mq) : false; - - double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1; - double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1; - - /* - * If the ndv of the PK - FK side don't match, and the PK side is a filter - * on the Key column then scale the NDV on the FK side. - * - * As described by Peter Boncz: http://databasearchitects.blogspot.com/ - * in such cases we can be off by a large margin in the Join cardinality - * estimate. The e.g. he provides is on the join of StoreSales and DateDim - * on the TPCDS dataset. Since the DateDim is populated for 20 years into - * the future, while the StoreSales only has 5 years worth of data, there - * are 40 times fewer distinct dates in StoreSales. - * - * In general it is hard to infer the range for the foreign key on an - * arbitrary expression. For e.g. the NDV for DayofWeek is the same - * irrespective of NDV on the number of unique days, whereas the - * NDV of Quarters has the same ratio as the NDV on the keys. - * - * But for expressions that apply only on columns that have the same NDV - * as the key (implying that they are alternate keys) we can apply the - * ratio. So in the case of StoreSales - DateDim joins for predicate on the - * d_date column we can apply the scaling factor. - */ - double ndvScalingFactor = 1.0; - if ( isPKSideSimpleTree ) { - ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV; - } + int pkSide = leftIsKey ? 0 : 1; + boolean isPKSideSimpleTree = leftIsKey ? SimpleTreeOnJoinKey.check(false, left, lBitSet, mq) : + SimpleTreeOnJoinKey.check(false, right, rBitSet, mq); + double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1; + double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1; + + /* + * If the ndv of the PK - FK side don't match, and the PK side is a filter + * on the Key column then scale the NDV on the FK side. + * + * As described by Peter Boncz: http://databasearchitects.blogspot.com/ + * in such cases we can be off by a large margin in the Join cardinality + * estimate. The e.g. he provides is on the join of StoreSales and DateDim + * on the TPCDS dataset. Since the DateDim is populated for 20 years into + * the future, while the StoreSales only has 5 years worth of data, there + * are 40 times fewer distinct dates in StoreSales. + * + * In general it is hard to infer the range for the foreign key on an + * arbitrary expression. For e.g. the NDV for DayofWeek is the same + * irrespective of NDV on the number of unique days, whereas the + * NDV of Quarters has the same ratio as the NDV on the keys. + * + * But for expressions that apply only on columns that have the same NDV + * as the key (implying that they are alternate keys) we can apply the + * ratio. So in the case of StoreSales - DateDim joins for predicate on the + * d_date column we can apply the scaling factor. + */ + double ndvScalingFactor = 1.0; + if ( isPKSideSimpleTree ) { + ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV; + } if (pkSide == 0) { FKSideInfo fkInfo = new FKSideInfo(rightRowCount, @@ -293,9 +315,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery pkSelectivity); return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); - } - - if (pkSide == 1) { + } else { // pkSide == 1 FKSideInfo fkInfo = new FKSideInfo(leftRowCount, leftNDV); double pkSelectivity = pkSelectivity(joinRel, mq, false, right, rightRowCount); @@ -304,10 +324,114 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 : pkSelectivity); - return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); + return new PKFKRelationInfo(0, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); + } + } + + /* + * + */ + public static Pair constraintsBasedAnalyzeJoinForPKFK(Join join, RelMetadataQuery mq) { + + if (join instanceof SemiJoin) { + // TODO: Support semijoin + return null; + } + + final RelNode left = join.getInputs().get(0); + final RelNode right = join.getInputs().get(1); + + // 1) Split filters in conjuncts + final List condConjs = RelOptUtil.conjunctions( + join.getCondition()); + + if (condConjs.isEmpty()) { + // Bail out + return null; } - return null; + // 2) Classify filters depending on their provenance + final List joinFilters = new ArrayList<>(condConjs); + final List leftFilters = new ArrayList<>(); + final List rightFilters = new ArrayList<>(); + RelOptUtil.classifyFilters(join, joinFilters, join.getJoinType(),false, + !join.getJoinType().generatesNullsOnRight(), !join.getJoinType().generatesNullsOnLeft(), + joinFilters, leftFilters, rightFilters); + + // 3) Check if we are joining on PK-FK + final PKFKJoinInfo leftInputResult = + HiveRelOptUtil.extractPKFKJoin(join, joinFilters, false, mq); + final PKFKJoinInfo rightInputResult = + HiveRelOptUtil.extractPKFKJoin(join, joinFilters, true, mq); + if (leftInputResult == null && rightInputResult == null) { + // Nothing to do here, bail out + return null; + } + + boolean leftIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.RIGHT) + && leftInputResult.isPkFkJoin; + boolean rightIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT) + && rightInputResult.isPkFkJoin; + if (!leftIsKey && !rightIsKey) { + // Nothing to do here, bail out + return null; + } + final double leftRowCount = mq.getRowCount(left); + final double rightRowCount = mq.getRowCount(right); + if (leftIsKey && rightIsKey) { + if (rightRowCount < leftRowCount) { + leftIsKey = false; + } + } + final ImmutableBitSet lBitSet = leftIsKey ? leftInputResult.pkFkJoinColumns.left : rightInputResult.pkFkJoinColumns.left; + final ImmutableBitSet rBitSet = leftIsKey ? leftInputResult.pkFkJoinColumns.right : rightInputResult.pkFkJoinColumns.right; + final List residualFilters = leftIsKey ? leftInputResult.additionalPredicates : rightInputResult.additionalPredicates; + + // 4) Extract additional information on the PK-FK relationship + int pkSide = leftIsKey ? 0 : 1; + boolean isPKSideSimpleTree = leftIsKey ? SimpleTreeOnJoinKey.check(true, left, lBitSet, mq) : + SimpleTreeOnJoinKey.check(true, right, rBitSet, mq); + RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + RexNode leftPred = RexUtil.composeConjunction( + rexBuilder, leftFilters, true); + RexNode rightPred = RexUtil.composeConjunction( + rexBuilder, rightFilters, true); + double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1; + double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1; + + // 5) Add the rest of operators back to the join filters + // and create residual condition + RexNode residualCond = residualFilters.isEmpty() ? null : + residualFilters.size() == 1 ? residualFilters.get(0) : + rexBuilder.makeCall(SqlStdOperatorTable.AND, residualFilters); + + // 6) Return result + if (pkSide == 0) { + FKSideInfo fkInfo = new FKSideInfo(rightRowCount, + rightNDV); + double pkSelectivity = pkSelectivity(join, mq, true, left, leftRowCount); + PKSideInfo pkInfo = new PKSideInfo(leftRowCount, + leftNDV, + join.getJoinType().generatesNullsOnRight() ? 1.0 : + pkSelectivity); + double ndvScalingFactor = isPKSideSimpleTree ? leftNDV/rightNDV : 1.0; + if (isPKSideSimpleTree) { + ndvScalingFactor = leftNDV/rightNDV; + } + return Pair.of(new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree), + residualCond); + } else { // pkSide == 1 + FKSideInfo fkInfo = new FKSideInfo(leftRowCount, + leftNDV); + double pkSelectivity = pkSelectivity(join, mq, false, right, rightRowCount); + PKSideInfo pkInfo = new PKSideInfo(rightRowCount, + rightNDV, + join.getJoinType().generatesNullsOnLeft() ? 1.0 : + pkSelectivity); + double ndvScalingFactor = isPKSideSimpleTree ? rightNDV/leftNDV : 1.0; + return Pair.of(new PKFKRelationInfo(0, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree), + residualCond); + } } private static double pkSelectivity(Join joinRel, RelMetadataQuery mq, boolean leftChild, @@ -402,20 +526,22 @@ private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery mq return new Pair(leftColIdx, rightColIdx); } - private static class IsSimpleTreeOnJoinKey extends RelVisitor { + private static class SimpleTreeOnJoinKey extends RelVisitor { - int joinKey; + boolean constraintsBased; + ImmutableBitSet joinKey; boolean simpleTree; RelMetadataQuery mq; - static boolean check(RelNode r, int joinKey, RelMetadataQuery mq) { - IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey, mq); + static boolean check(boolean constraintsBased, RelNode r, ImmutableBitSet joinKey, RelMetadataQuery mq) { + SimpleTreeOnJoinKey v = new SimpleTreeOnJoinKey(constraintsBased, joinKey, mq); v.go(r); return v.simpleTree; } - IsSimpleTreeOnJoinKey(int joinKey, RelMetadataQuery mq) { + SimpleTreeOnJoinKey(boolean constraintsBased, ImmutableBitSet joinKey, RelMetadataQuery mq) { super(); + this.constraintsBased = constraintsBased; this.joinKey = joinKey; this.mq = mq; simpleTree = true; @@ -444,16 +570,23 @@ public void visit(RelNode node, int ordinal, RelNode parent) { } private boolean isSimple(Project project) { - RexNode r = project.getProjects().get(joinKey); - if (r instanceof RexInputRef) { - joinKey = ((RexInputRef) r).getIndex(); - return true; + ImmutableBitSet.Builder b = ImmutableBitSet.builder(); + for (int pos : joinKey) { + RexNode r = project.getProjects().get(pos); + if (!(r instanceof RexInputRef)) { + return false; + } + b.set(((RexInputRef) r).getIndex()); } - return false; + joinKey = b.build(); + return true; } private boolean isSimple(Filter filter, RelMetadataQuery mq) { ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition()); + if (constraintsBased) { + return mq.areColumnsUnique(filter, condBits); + } return isKey(condBits, filter, mq); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index 575902d78d..7e9208229a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -98,8 +98,7 @@ private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode } catch (CalciteSemanticException e) { throw new RuntimeException(e); } - ImmutableMap.Builder colStatMapBuilder = ImmutableMap - .builder(); + ImmutableMap.Builder colStatMapBuilder = ImmutableMap.builder(); ImmutableMap colStatMap; int rightOffSet = j.getLeft().getRowType().getFieldCount(); diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out index d1005788d5..3d190e3b68 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query17.q.out @@ -104,36 +104,36 @@ CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], s_state=[$2], store_sales_quantitycount=[$3], store_sales_quantityave=[/(CAST($4):DOUBLE, $3)], store_sales_quantitystdev=[POWER(/(-($5, /(*($6, $6), $3)), CASE(=($3, 1), null, -($3, 1))), 0.5)], store_sales_quantitycov=[/(POWER(/(-($5, /(*($6, $6), $3)), CASE(=($3, 1), null, -($3, 1))), 0.5), /(CAST($4):DOUBLE, $3))], as_store_returns_quantitycount=[$7], as_store_returns_quantityave=[/(CAST($8):DOUBLE, $7)], as_store_returns_quantitystdev=[POWER(/(-($9, /(*($10, $10), $7)), CASE(=($7, 1), null, -($7, 1))), 0.5)], store_returns_quantitycov=[/(POWER(/(-($9, /(*($10, $10), $7)), CASE(=($7, 1), null, -($7, 1))), 0.5), /(CAST($8):DOUBLE, $7))], catalog_sales_quantitycount=[$11], catalog_sales_quantityave=[/(CAST($12):DOUBLE, $11)], catalog_sales_quantitystdev=[/(POWER(/(-($13, /(*($14, $14), $11)), CASE(=($11, 1), null, -($11, 1))), 0.5), /(CAST($12):DOUBLE, $11))], catalog_sales_quantitycov=[/(POWER(/(-($13, /(*($14, $14), $11)), CASE(=($11, 1), null, -($11, 1))), 0.5), /(CAST($12):DOUBLE, $11))]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[sum($3)], agg#2=[sum($7)], agg#3=[sum($6)], agg#4=[count($4)], agg#5=[sum($4)], agg#6=[sum($9)], agg#7=[sum($8)], agg#8=[count($5)], agg#9=[sum($5)], agg#10=[sum($11)], agg#11=[sum($10)]) - HiveProject($f0=[$8], $f1=[$9], $f2=[$22], $f3=[$5], $f4=[$19], $f5=[$13], $f30=[CAST($5):DOUBLE], $f7=[*(CAST($5):DOUBLE, CAST($5):DOUBLE)], $f40=[CAST($19):DOUBLE], $f9=[*(CAST($19):DOUBLE, CAST($19):DOUBLE)], $f50=[CAST($13):DOUBLE], $f11=[*(CAST($13):DOUBLE, CAST($13):DOUBLE)]) - HiveJoin(condition=[=($21, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(AND(=($2, $17), =($1, $16)), =($4, $18))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($15, _UTF-16LE'2000Q1')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject($f0=[$6], $f1=[$7], $f2=[$22], $f3=[$13], $f4=[$19], $f5=[$3], $f30=[CAST($13):DOUBLE], $f7=[*(CAST($13):DOUBLE, CAST($13):DOUBLE)], $f40=[CAST($19):DOUBLE], $f9=[*(CAST($19):DOUBLE, CAST($19):DOUBLE)], $f50=[CAST($3):DOUBLE], $f11=[*(CAST($3):DOUBLE, CAST($3):DOUBLE)]) + HiveJoin(condition=[AND(=($17, $1), =($16, $2))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$2], ss_sold_date_sk=[$3], ss_item_sk=[$4], ss_customer_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$7], ss_quantity=[$8], d_date_sk=[$9], sr_returned_date_sk=[$10], sr_item_sk=[$11], sr_customer_sk=[$12], sr_ticket_number=[$13], sr_return_quantity=[$14], d_date_sk0=[$15], s_store_sk=[$16], s_state=[$17]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_quantity=[$3], d_date_sk=[$4], sr_returned_date_sk=[$5], sr_item_sk=[$6], sr_customer_sk=[$7], sr_ticket_number=[$8], sr_return_quantity=[$9], d_date_sk0=[$10]) - HiveJoin(condition=[AND(=($7, $1), =($6, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_quantity=[$18]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_return_quantity=[$4], d_date_sk=[$5]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_return_quantity=[$10]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($13, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(AND(=($2, $9), =($1, $8)), =($4, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(s_store_sk=[$0], s_state=[$24]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveFilter(condition=[=($15, _UTF-16LE'2000Q1')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_return_quantity=[$4], d_date_sk=[$5]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_return_quantity=[$10]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($15, _UTF-16LE'2000Q1', _UTF-16LE'2000Q2', _UTF-16LE'2000Q3')]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out index 0801f34472..1be393206a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain cbo with ssales as (select c_last_name @@ -117,20 +117,44 @@ CBO PLAN: HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveJoin(condition=[>($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_last_name=[$1], c_first_name=[$0], s_store_name=[$2], $f3=[$3]) - HiveAggregate(group=[{4, 5, 7}], agg#0=[sum($9)]) - HiveProject(i_current_price=[$0], i_size=[$1], i_units=[$2], i_manager_id=[$3], c_first_name=[$4], c_last_name=[$5], ca_state=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) - HiveAggregate(group=[{8, 9, 10, 11, 14, 15, 18, 22, 23}], agg#0=[sum($6)]) - HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{0, 1, 7}], agg#0=[sum($9)]) + HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], i_current_price=[$3], i_size=[$4], i_units=[$5], i_manager_id=[$6], s_store_name=[$7], s_state=[$8], $f9=[$9]) + HiveAggregate(group=[{4, 5, 8, 17, 18, 19, 20, 22, 23}], agg#0=[sum($15)]) + HiveJoin(condition=[AND(=($14, $1), =($11, $0))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[AND(=($1, $10), =($2, $19))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($22, $7), =($11, $19))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($10, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) + HiveFilter(condition=[IS NOT NULL($9)]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ss_store_sk=[$2], ss_ticket_number=[$3], ss_sales_price=[$4], i_item_sk=[$5], i_current_price=[$6], i_size=[$7], i_units=[$8], i_manager_id=[$9]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) + HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) + HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(_o__c0=[*(0.05, /($0, $1))]) + HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) + HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10]) + HiveAggregate(group=[{9, 10, 13, 17, 18, 21, 22, 23, 24, 25}], agg#0=[sum($4)]) + HiveJoin(condition=[=($0, $20)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $7), =($2, $16))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $6), =($0, $5))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_units=[$18], i_manager_id=[$20]) - HiveFilter(condition=[=($17, _UTF-16LE'orchid')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], c_first_name=[$2], c_last_name=[$3], c_birth_country=[$4], ca_address_sk=[$5], ca_state=[$6], ca_zip=[$7], UPPER=[$8], s_store_sk=[$9], s_store_name=[$10], s_state=[$11], s_zip=[$12]) HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) @@ -143,29 +167,6 @@ HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(_o__c0=[*(0.05, /($0, $1))]) - HiveAggregate(group=[{}], agg#0=[sum($10)], agg#1=[count($10)]) - HiveProject(c_first_name=[$0], c_last_name=[$1], ca_state=[$2], s_store_name=[$3], s_state=[$4], i_current_price=[$5], i_size=[$6], i_color=[$7], i_units=[$8], i_manager_id=[$9], $f10=[$10]) - HiveAggregate(group=[{9, 10, 13, 17, 18, 21, 22, 23, 24, 25}], agg#0=[sum($6)]) - HiveJoin(condition=[AND(=($5, $1), =($2, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($0, $18)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($17, $12), =($2, $14))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_sales_price=[$13]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($3))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[AND(=($1, $5), <>($4, $8))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4], c_first_name=[$8], c_last_name=[$9], c_birth_country=[$14]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0], ca_state=[$8], ca_zip=[$9], UPPER=[UPPER($10)]) - HiveFilter(condition=[IS NOT NULL($9)]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_state=[$24], s_zip=[$25]) - HiveFilter(condition=[AND(=($10, 7), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_sk=[$0], i_current_price=[$5], i_size=[$15], i_color=[$17], i_units=[$18], i_manager_id=[$20]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out index a39931a22c..658410cf13 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query25.q.out @@ -109,28 +109,28 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], s_store_id=[$2], s_store_name=[$3], $f4=[$4], $f5=[$5], $f6=[$6]) - HiveAggregate(group=[{1, 2, 22, 23}], agg#0=[sum($8)], agg#1=[sum($19)], agg#2=[sum($13)]) - HiveJoin(condition=[=($21, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{6, 7, 22, 23}], agg#0=[sum($13)], agg#1=[sum($19)], agg#2=[sum($3)]) + HiveJoin(condition=[AND(=($17, $1), =($16, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[AND(AND(=($2, $14), =($1, $13)), =($4, $15))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_net_profit=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($7))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($8, 4), =($6, 2000))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2], cs_net_profit=[$3], d_date_sk=[$4], sr_returned_date_sk=[$5], sr_item_sk=[$6], sr_customer_sk=[$7], sr_ticket_number=[$8], sr_net_loss=[$9], d_date_sk0=[$10]) - HiveJoin(condition=[AND(=($7, $1), =($6, $2))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_net_profit=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15], cs_net_profit=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$2], ss_sold_date_sk=[$3], ss_item_sk=[$4], ss_customer_sk=[$5], ss_store_sk=[$6], ss_ticket_number=[$7], ss_net_profit=[$8], d_date_sk=[$9], sr_returned_date_sk=[$10], sr_item_sk=[$11], sr_customer_sk=[$12], sr_ticket_number=[$13], sr_net_loss=[$14], d_date_sk0=[$15], s_store_sk=[$16], s_store_id=[$17], s_store_name=[$18]) + HiveJoin(condition=[=($16, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[AND(AND(=($2, $9), =($1, $8)), =($4, $10))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_store_sk=[$7], ss_ticket_number=[$9], ss_net_profit=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0), IS NOT NULL($7))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveFilter(condition=[AND(=($8, 4), =($6, 2000))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$1], sr_customer_sk=[$2], sr_ticket_number=[$3], sr_net_loss=[$4], d_date_sk=[$5]) HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(sr_returned_date_sk=[$0], sr_item_sk=[$2], sr_customer_sk=[$3], sr_ticket_number=[$9], sr_net_loss=[$19]) @@ -139,6 +139,6 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(BETWEEN(false, $8, 4, 10), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_store_sk=[$0], s_store_id=[$1], s_store_name=[$5]) + HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out index 1cf3ce4074..5d9dd17343 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 32' is a cross product -Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[264][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[274][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain cbo with my_customers as ( select distinct c_customer_sk @@ -139,56 +139,56 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveFilter(condition=[BETWEEN(false, $2, $3, $4)]) HiveProject(c_customer_sk=[$0], ss_ext_sales_price=[$4], d_month_seq=[$11], _o__c0=[$13], $f0=[$14]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$10], $f1=[$11], ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_ext_sales_price=[$2], ca_address_sk=[$5], ca_county=[$6], ca_state=[$7], s_county=[$8], s_state=[$9], d_date_sk=[$3], d_month_seq=[$4], cnt=[$12], $f00=[$13]) + HiveProject($f0=[$11], $f1=[$12], ss_sold_date_sk=[$0], ss_customer_sk=[$1], ss_ext_sales_price=[$2], ca_address_sk=[$6], ca_county=[$7], ca_state=[$8], s_county=[$9], s_state=[$10], d_date_sk=[$4], d_month_seq=[$5], cnt=[$3], $f00=[$13]) HiveJoin(condition=[true], joinType=[left], algorithm=[none], cost=[not available]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($10, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($11, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_sold_date_sk=[$0], ss_customer_sk=[$3], ss_ext_sales_price=[$15]) HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($0))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_month_seq=[$3]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6]) - HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) - HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(s_county=[$23], s_state=[$24]) - HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) - HiveAggregate(group=[{0, 1}]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) - HiveUnion(all=[true]) - HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[AND(=($8, 3), =($6, 1999))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Jewelry'), =($10, _UTF-16LE'consignment'))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[<=(sq_count_check($0), 1)]) HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveProject($f0=[$0]) - HiveAggregate(group=[{0}]) - HiveProject($f0=[+($3, 1)]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveFilter(condition=[<=(sq_count_check($0), 1)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveProject($f0=[$0]) + HiveAggregate(group=[{0}]) + HiveProject($f0=[+($3, 1)]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0], ca_county=[$1], ca_state=[$2], s_county=[$3], s_state=[$4], c_customer_sk=[$5], c_current_addr_sk=[$6]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($1, $3), =($2, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_county=[$7], ca_state=[$8]) + HiveFilter(condition=[AND(IS NOT NULL($7), IS NOT NULL($8))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(s_county=[$23], s_state=[$24]) + HiveFilter(condition=[AND(IS NOT NULL($23), IS NOT NULL($24))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) + HiveAggregate(group=[{0, 1}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveJoin(condition=[=($2, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$1], cs_item_sk=[$2]) + HiveUnion(all=[true]) + HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($3))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(sold_date_sk=[$0], customer_sk=[$4], item_sk=[$3]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($4))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($8, 3), =($6, 1999))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Jewelry'), =($10, _UTF-16LE'consignment'))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject($f0=[$0]) HiveAggregate(group=[{0}]) HiveProject($f0=[+($3, 1)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out index 60c4325f41..103b67125c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query64.q.out @@ -267,124 +267,124 @@ HiveProject(product_name=[$0], store_name=[$1], store_zip=[$2], b_street_number= HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$18], dir0=[ASC], dir1=[ASC], dir2=[ASC]) HiveProject(product_name=[$0], store_name=[$2], store_zip=[$3], b_street_number=[$4], b_streen_name=[$5], b_city=[$6], b_zip=[$7], c_street_number=[$8], c_street_name=[$9], c_city=[$10], c_zip=[$11], cnt=[$12], s1=[$13], s2=[$14], s3=[$15], s11=[$20], s21=[$21], s31=[$22], cnt1=[$19]) HiveJoin(condition=[AND(AND(AND(=($1, $16), <=($19, $12)), =($2, $17)), =($3, $18))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject($f0=[$13], $f1=[$12], $f2=[$10], $f3=[$11], $f4=[$6], $f5=[$7], $f6=[$8], $f7=[$9], $f8=[$0], $f9=[$1], $f10=[$2], $f11=[$3], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) - HiveAggregate(group=[{9, 10, 11, 12, 15, 17, 23, 24, 25, 26, 28, 29, 42, 43}], agg#0=[count()], agg#1=[sum($39)], agg#2=[sum($40)], agg#3=[sum($41)]) - HiveJoin(condition=[AND(<>($1, $19), =($34, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[=($31, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $16)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], ss_sold_date_sk=[$11], ss_item_sk=[$12], ss_customer_sk=[$13], ss_cdemo_sk=[$14], ss_hdemo_sk=[$15], ss_addr_sk=[$16], ss_store_sk=[$17], ss_ticket_number=[$18], ss_wholesale_cost=[$19], ss_list_price=[$20], ss_coupon_amt=[$21], i_item_sk=[$22], i_product_name=[$23], d_date_sk=[$24], cs_item_sk=[$25]) - HiveJoin(condition=[AND(=($12, $0), =($18, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) - HiveJoin(condition=[=($10, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) - HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2000)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cs_item_sk=[$0]) - HiveFilter(condition=[>($1, *(2, $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject($f1=[$12], $f2=[$10], $f3=[$11], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) - HiveAggregate(group=[{9, 10, 11, 12, 15, 17, 23, 24, 25, 26, 28, 29, 42, 43}], agg#0=[count()], agg#1=[sum($39)], agg#2=[sum($40)], agg#3=[sum($41)]) - HiveJoin(condition=[AND(<>($1, $19), =($34, $0))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) - HiveJoin(condition=[=($31, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $16)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($5, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) - HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(d_date_sk=[$0], d_year=[$6]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) - HiveProject(sr_item_sk=[$0], sr_ticket_number=[$1], ca_address_sk=[$2], ca_street_number=[$3], ca_street_name=[$4], ca_city=[$5], ca_zip=[$6], s_store_sk=[$7], s_store_name=[$8], s_zip=[$9], hd_demo_sk=[$10], ss_sold_date_sk=[$11], ss_item_sk=[$12], ss_customer_sk=[$13], ss_cdemo_sk=[$14], ss_hdemo_sk=[$15], ss_addr_sk=[$16], ss_store_sk=[$17], ss_ticket_number=[$18], ss_wholesale_cost=[$19], ss_list_price=[$20], ss_coupon_amt=[$21], i_item_sk=[$22], i_product_name=[$23], d_date_sk=[$24], cs_item_sk=[$25]) - HiveJoin(condition=[AND(=($12, $0), =($18, $1))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) - HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) - HiveJoin(condition=[=($10, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) - HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) - HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_product_name=[$21]) - HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[=($6, 2001)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(cs_item_sk=[$0]) - HiveFilter(condition=[>($1, *(2, $2))]) - HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) - HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject($f0=[$13], $f1=[$12], $f2=[$4], $f3=[$5], $f4=[$0], $f5=[$1], $f6=[$2], $f7=[$3], $f8=[$6], $f9=[$7], $f10=[$8], $f11=[$9], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) + HiveAggregate(group=[{3, 4, 5, 6, 8, 9, 20, 21, 22, 23, 26, 28, 42, 43}], agg#0=[count()], agg#1=[sum($39)], agg#2=[sum($40)], agg#3=[sum($41)]) + HiveJoin(condition=[AND(=($32, $0), =($38, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($34, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveJoin(condition=[=($30, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($25, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveJoin(condition=[=($21, $34)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $0), <>($19, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveJoin(condition=[=($20, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$1], ss_sold_date_sk=[$2], ss_item_sk=[$3], ss_customer_sk=[$4], ss_cdemo_sk=[$5], ss_hdemo_sk=[$6], ss_addr_sk=[$7], ss_store_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12], i_item_sk=[$13], i_product_name=[$14], d_date_sk=[$15]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2000)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cs_item_sk=[$0]) + HiveFilter(condition=[>($1, *(2, $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject($f1=[$12], $f2=[$4], $f3=[$5], $f15=[$14], $f16=[$15], $f17=[$16], $f18=[$17]) + HiveAggregate(group=[{3, 4, 5, 6, 8, 9, 20, 21, 22, 23, 26, 28, 42, 43}], agg#0=[count()], agg#1=[sum($39)], agg#2=[sum($40)], agg#3=[sum($41)]) + HiveJoin(condition=[AND(=($32, $0), =($38, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$2], sr_ticket_number=[$9]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($34, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad1]) + HiveJoin(condition=[=($30, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_store_name=[$5], s_zip=[$25]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($25))]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($25, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd1]) + HiveJoin(condition=[=($21, $34)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $0), <>($19, $1))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd2]) + HiveJoin(condition=[=($20, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $14)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($5, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], c_first_shipto_date_sk=[$5], c_first_sales_date_sk=[$6]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($5), IS NOT NULL($2), IS NOT NULL($3), IS NOT NULL($4))]) + HiveTableScan(table=[[default, customer]], table:alias=[customer]) + HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_city=[$6], ca_zip=[$9]) + HiveTableScan(table=[[default, customer_address]], table:alias=[ad2]) + HiveProject(hd_demo_sk=[$0]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[hd2]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) + HiveProject(d_date_sk=[$0], d_year=[$6]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$1], ss_sold_date_sk=[$2], ss_item_sk=[$3], ss_customer_sk=[$4], ss_cdemo_sk=[$5], ss_hdemo_sk=[$6], ss_addr_sk=[$7], ss_store_sk=[$8], ss_ticket_number=[$9], ss_wholesale_cost=[$10], ss_list_price=[$11], ss_coupon_amt=[$12], i_item_sk=[$13], i_product_name=[$14], d_date_sk=[$15]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[cd1]) + HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3], ss_cdemo_sk=[$4], ss_hdemo_sk=[$5], ss_addr_sk=[$6], ss_store_sk=[$7], ss_ticket_number=[$9], ss_wholesale_cost=[$11], ss_list_price=[$12], ss_coupon_amt=[$19]) + HiveFilter(condition=[AND(IS NOT NULL($0), IS NOT NULL($7), IS NOT NULL($3), IS NOT NULL($4), IS NOT NULL($8), IS NOT NULL($5), IS NOT NULL($6))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(i_item_sk=[$0], i_product_name=[$21]) + HiveFilter(condition=[AND(IN($17, _UTF-16LE'maroon', _UTF-16LE'burnished', _UTF-16LE'dim', _UTF-16LE'steel', _UTF-16LE'navajo', _UTF-16LE'chocolate'), BETWEEN(false, $5, 35, 45), BETWEEN(false, $5, 36, 50))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[=($6, 2001)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(cs_item_sk=[$0]) + HiveFilter(condition=[>($1, *(2, $2))]) + HiveAggregate(group=[{0}], agg#0=[sum($2)], agg#1=[sum($5)]) + HiveJoin(condition=[AND(=($0, $3), =($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$15], cs_order_number=[$17], cs_ext_list_price=[$25]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16], +=[+(+($23, $24), $25)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out index 5e6950fb3e..c28c941213 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query72.q.out @@ -82,9 +82,11 @@ CBO PLAN: HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-last], dir1=[ASC], dir2=[ASC], dir3=[ASC], fetch=[100]) HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3], $f4=[$4], $f5=[$5]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)], agg#1=[count($4)], agg#2=[count()]) - HiveProject($f0=[$15], $f1=[$13], $f2=[$19], $f3=[CASE(IS NULL($25), 1, 0)], $f4=[CASE(IS NOT NULL($25), 1, 0)]) - HiveJoin(condition=[AND(=($26, $4), =($27, $6))], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$8], i_item_desc=[$9], cd_demo_sk=[$21], hd_demo_sk=[$22], d_date_sk=[$18], d_week_seq=[$19], +=[$20], d_date_sk0=[$24], d_week_seq0=[$25], d_date_sk1=[$6], CAST=[$7], p_promo_sk=[$23]) + HiveProject($f0=[$17], $f1=[$15], $f2=[$21], $f3=[CASE(IS NULL($27), 1, 0)], $f4=[CASE(IS NOT NULL($27), 1, 0)]) + HiveJoin(condition=[AND(=($0, $6), =($1, $8))], joinType=[right], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$2], cr_order_number=[$16]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveProject(cs_sold_date_sk=[$10], cs_ship_date_sk=[$11], cs_bill_cdemo_sk=[$12], cs_bill_hdemo_sk=[$13], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_quantity=[$17], inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3], w_warehouse_sk=[$4], w_warehouse_name=[$5], i_item_sk=[$6], i_item_desc=[$7], cd_demo_sk=[$21], hd_demo_sk=[$22], d_date_sk=[$18], d_week_seq=[$19], +=[$20], d_date_sk0=[$24], d_week_seq0=[$25], d_date_sk1=[$8], CAST=[$9], p_promo_sk=[$23]) HiveJoin(condition=[AND(=($0, $24), =($19, $25))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($14, $1), <($3, $17))], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -92,14 +94,14 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-l HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) - HiveProject(d_date_sk=[$0], CAST=[$1], i_item_sk=[$2], i_item_desc=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], d_week_seq=[$13], +=[$14], cd_demo_sk=[$15], hd_demo_sk=[$16], p_promo_sk=[$17]) - HiveJoin(condition=[AND(=($5, $0), >($1, $14))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(i_item_sk=[$0], i_item_desc=[$4]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveJoin(condition=[=($5, $13)], joinType=[left], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_desc=[$1], d_date_sk=[$2], CAST=[$3], cs_sold_date_sk=[$4], cs_ship_date_sk=[$5], cs_bill_cdemo_sk=[$6], cs_bill_hdemo_sk=[$7], cs_item_sk=[$8], cs_promo_sk=[$9], cs_order_number=[$10], cs_quantity=[$11], d_date_sk0=[$12], d_week_seq=[$13], +=[$14], cd_demo_sk=[$15], hd_demo_sk=[$16], p_promo_sk=[$17]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_desc=[$4]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveJoin(condition=[=($7, $15)], joinType=[left], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($3, $0), >($1, $12))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], CAST=[CAST($2):DOUBLE]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d3]) HiveJoin(condition=[=($3, $12)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -115,11 +117,9 @@ HiveSortLimit(sort0=[$5], sort1=[$0], sort2=[$1], sort3=[$2], dir0=[DESC-nulls-l HiveProject(hd_demo_sk=[$0]) HiveFilter(condition=[=($2, _UTF-16LE'1001-5000')]) HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(p_promo_sk=[$0]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) + HiveProject(p_promo_sk=[$0]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(d_date_sk=[$0], d_week_seq=[$4]) HiveFilter(condition=[IS NOT NULL($4)]) HiveTableScan(table=[[default, date_dim]], table:alias=[d2]) - HiveProject(cr_item_sk=[$2], cr_order_number=[$16]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out index 43ea953562..d5974e2683 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query84.q.out @@ -56,15 +56,15 @@ CBO PLAN: HiveProject(customer_id=[$0], customername=[$1]) HiveSortLimit(sort0=[$2], dir0=[ASC], fetch=[100]) HiveProject(customer_id=[$2], customername=[$6], c_customer_id=[$2]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_cdemo_sk=[$4]) - HiveFilter(condition=[IS NOT NULL($4)]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(cd_demo_sk=[$0]) - HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) - HiveProject(c_customer_id=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], ||=[$4], ca_address_sk=[$5], hd_demo_sk=[$6], hd_income_band_sk=[$7], ib_income_band_sk=[$8]) - HiveJoin(condition=[=($6, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($8, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_cdemo_sk=[$4]) + HiveFilter(condition=[IS NOT NULL($4)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveProject(cd_demo_sk=[$0]) + HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics]) + HiveProject(c_customer_id=[$0], c_current_cdemo_sk=[$1], c_current_hdemo_sk=[$2], c_current_addr_sk=[$3], ||=[$4], ca_address_sk=[$5]) HiveJoin(condition=[=($3, $5)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_id=[$1], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4], ||=[||(||($9, _UTF-16LE', '), $8)]) HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($3))]) @@ -72,12 +72,12 @@ HiveProject(customer_id=[$0], customername=[$1]) HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($6, _UTF-16LE'Hopewell')]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) - HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) - HiveProject(ib_income_band_sk=[$0]) - HiveFilter(condition=[AND(>=($1, 32287), <=($2, 82287))]) - HiveTableScan(table=[[default, income_band]], table:alias=[income_band]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1], ib_income_band_sk=[$2]) + HiveJoin(condition=[=($2, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(hd_demo_sk=[$0], hd_income_band_sk=[$1]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics]) + HiveProject(ib_income_band_sk=[$0]) + HiveFilter(condition=[AND(>=($1, 32287), <=($2, 82287))]) + HiveTableScan(table=[[default, income_band]], table:alias=[income_band]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out index e796101e45..84eda72da7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out @@ -103,217 +103,225 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 19 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Map 19 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 12 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 21 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 22 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) Reducer 15 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Map 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_259] - Limit [LIM_258] (rows=100 width=466) + Reducer 5 vectorized + File Output Operator [FS_252] + Limit [LIM_251] (rows=100 width=466) Number of rows:100 - Select Operator [SEL_257] (rows=4815969644 width=466) + Select Operator [SEL_250] (rows=4815969566 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_256] - Select Operator [SEL_255] (rows=4815969644 width=466) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_249] + Select Operator [SEL_248] (rows=4815969566 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_254] (rows=4815969644 width=466) + Group By Operator [GBY_247] (rows=4815969566 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 5 [SIMPLE_EDGE] + <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_48] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_47] (rows=4815969644 width=466) + Group By Operator [GBY_47] (rows=4815969566 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_91] (rows=4815969644 width=381) + Top N Key Operator [TNK_92] (rows=4815969566 width=381) keys:_col0, _col1, _col2,sort order:+++,top n:100 - Select Operator [SEL_45] (rows=4815969644 width=381) + Select Operator [SEL_45] (rows=4815969566 width=381) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_211] (rows=4815969644 width=381) - Conds:RS_42._col3=RS_253._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_253] - PartitionCols:_col0 - Select Operator [SEL_252] (rows=1704 width=90) - Output:["_col0","_col1"] - TableScan [TS_31] (rows=1704 width=90) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_210] (rows=4815969644 width=299) - Conds:RS_39._col1, _col2, _col4=RS_40._col6, _col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_209] (rows=540026342 width=19) - Conds:RS_27._col2, _col1=RS_28._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] - <-Reducer 10 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_27] - PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_207] (rows=14254135 width=11) - Conds:RS_242._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_220] - PartitionCols:_col0 - Select Operator [SEL_216] (rows=3652 width=4) - Output:["_col0"] - Filter Operator [FIL_213] (rows=3652 width=94) - predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') - TableScan [TS_3] (rows=73049 width=94) - default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] - PartitionCols:_col0 - Select Operator [SEL_241] (rows=285117831 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_240] (rows=285117831 width=15) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_28_store_returns_sr_customer_sk_min) AND DynamicValue(RS_28_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_28_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_28_store_returns_sr_item_sk_min) AND DynamicValue(RS_28_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_28_store_returns_sr_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_25_d3_d_date_sk_min) AND DynamicValue(RS_25_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_25_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_8] (rows=287989836 width=15) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_234] - Group By Operator [GBY_232] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_109] - Group By Operator [GBY_108] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_107] (rows=2681277 width=8) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_208] (rows=2681277 width=10) - Conds:RS_231._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_222] - PartitionCols:_col0 - Select Operator [SEL_217] (rows=3652 width=4) + Merge Join Operator [MERGEJOIN_204] (rows=4815969566 width=381) + Conds:RS_42._col2, _col1=RS_43._col11, _col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22"] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_42] + PartitionCols:_col2, _col1 + Merge Join Operator [MERGEJOIN_198] (rows=14254135 width=11) + Conds:RS_224._col0=RS_211._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_211] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_205] (rows=3652 width=94) + predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') + TableScan [TS_3] (rows=73049 width=94) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col0 + Select Operator [SEL_223] (rows=285117831 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_222] (rows=285117831 width=15) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_d3_d_date_sk_min) AND DynamicValue(RS_40_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_221] + Group By Operator [GBY_220] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_218] + Group By Operator [GBY_216] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_212] (rows=3652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_208] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col11, _col12 + Select Operator [SEL_38] (rows=23911701 width=381) + Output:["_col1","_col2","_col8","_col11","_col12","_col14","_col17"] + Merge Join Operator [MERGEJOIN_203] (rows=23911701 width=381) + Conds:RS_35._col1=RS_246._col0(Inner),Output:["_col5","_col8","_col9","_col11","_col14","_col16","_col17"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + PartitionCols:_col0 + Select Operator [SEL_245] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + TableScan [TS_24] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_202] (rows=23911701 width=101) + Conds:RS_32._col3=RS_244._col0(Inner),Output:["_col1","_col5","_col8","_col9","_col11","_col14"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] + PartitionCols:_col0 + Select Operator [SEL_243] (rows=1704 width=90) + Output:["_col0","_col1"] + TableScan [TS_22] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_201] (rows=23911701 width=15) + Conds:RS_29._col1, _col2, _col4=RS_30._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col8","_col9","_col11"] + <-Reducer 15 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_30] + PartitionCols:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_200] (rows=2681277 width=10) + Conds:RS_229._col0=RS_215._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_215] + PartitionCols:_col0 + Select Operator [SEL_210] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_207] (rows=3652 width=94) + predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') + Please refer to the previous TableScan [TS_3] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_229] + PartitionCols:_col0 + Select Operator [SEL_228] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_227] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_12] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1, _col2, _col4 + Merge Join Operator [MERGEJOIN_199] (rows=27749405 width=10) + Conds:RS_242._col0=RS_213._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_213] + PartitionCols:_col0 + Select Operator [SEL_209] (rows=101 width=4) + Output:["_col0"] + Filter Operator [FIL_206] (rows=101 width=94) + predicate:(d_quarter_name = '2000Q1') + Please refer to the previous TableScan [TS_3] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_242] + PartitionCols:_col0 + Select Operator [SEL_241] (rows=501694138 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_240] (rows=501694138 width=23) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_30_store_returns_sr_customer_sk_min) AND DynamicValue(RS_30_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_30_store_returns_sr_customer_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_42_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_42_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_42_catalog_sales_cs_bill_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_30_store_returns_sr_item_sk_min) AND DynamicValue(RS_30_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_30_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_42_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_42_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_42_catalog_sales_cs_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_30_store_returns_sr_ticket_number_min) AND DynamicValue(RS_30_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_30_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_226] + Group By Operator [GBY_225] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_219] + Group By Operator [GBY_217] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_214] (rows=101 width=4) Output:["_col0"] - Filter Operator [FIL_214] (rows=3652 width=94) - predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') - Please refer to the previous TableScan [TS_3] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] - PartitionCols:_col0 - Select Operator [SEL_230] (rows=53632139 width=19) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_229] (rows=53632139 width=19) - predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_14] (rows=57591150 width=19) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Group By Operator [GBY_237] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_124] - Group By Operator [GBY_123] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_122] (rows=2681277 width=2) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_208] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_226] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=3652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] - <-Reducer 15 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_28] - PartitionCols:_col1, _col2 - Please refer to the previous Merge Join Operator [MERGEJOIN_208] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_206] (rows=27749405 width=294) - Conds:RS_36._col1=RS_251._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_251] - PartitionCols:_col0 - Select Operator [SEL_250] (rows=462000 width=288) - Output:["_col0","_col1","_col2"] - TableScan [TS_6] (rows=462000 width=288) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_205] (rows=27749405 width=10) - Conds:RS_249._col0=RS_218._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_218] - PartitionCols:_col0 - Select Operator [SEL_215] (rows=101 width=4) - Output:["_col0"] - Filter Operator [FIL_212] (rows=101 width=94) - predicate:(d_quarter_name = '2000Q1') - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_249] - PartitionCols:_col0 - Select Operator [SEL_248] (rows=501694138 width=23) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_247] (rows=501694138 width=23) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_27_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_27_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_27_catalog_sales_cs_bill_customer_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_28_store_returns_sr_customer_sk_min) AND DynamicValue(RS_28_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_28_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_27_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_27_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_27_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_store_returns_sr_item_sk_min) AND DynamicValue(RS_28_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_34_d1_d_date_sk_min) AND DynamicValue(RS_34_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_34_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=23) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_233] - Please refer to the previous Group By Operator [GBY_232] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_238] - Please refer to the previous Group By Operator [GBY_237] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_244] - Group By Operator [GBY_243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 10 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_114] - Group By Operator [GBY_113] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_112] (rows=14254135 width=8) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_207] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_246] - Group By Operator [GBY_245] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 10 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_129] - Group By Operator [GBY_128] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_127] (rows=14254135 width=7) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_207] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_228] - Group By Operator [GBY_227] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_225] - Group By Operator [GBY_223] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_219] (rows=101 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_215] + Please refer to the previous Select Operator [SEL_209] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_231] + Group By Operator [GBY_230] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_118] (rows=2681277 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_200] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_233] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_123] (rows=2681277 width=2) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_200] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_235] + Group By Operator [GBY_234] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1746340)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_130] + Group By Operator [GBY_129] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1746340)"] + Select Operator [SEL_128] (rows=2681277 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_200] + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_237] + Group By Operator [GBY_236] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_145] + Group By Operator [GBY_144] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_143] (rows=14254135 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_198] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_239] + Group By Operator [GBY_238] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_150] + Group By Operator [GBY_149] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_148] (rows=14254135 width=7) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_198] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out index fb77386d6e..e12be717df 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[298][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 7' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -116,38 +116,38 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) -Map 25 <- Reducer 22 (BROADCAST_EDGE) -Reducer 10 <- Map 24 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Map 17 <- Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Map 25 <- Reducer 13 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) -Reducer 15 <- Map 23 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 13 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Map 21 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 13 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 14 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 24 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 7 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE) +Reducer 3 <- Reducer 18 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 21 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 23 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 12 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 1 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 9 <- Reducer 24 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 6 - File Output Operator [FS_91] - Select Operator [SEL_90] (rows=1313165 width=380) + Reducer 7 + File Output Operator [FS_92] + Select Operator [SEL_91] (rows=37400 width=380) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_89] (rows=1313165 width=492) + Filter Operator [FIL_90] (rows=37400 width=492) predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_298] (rows=3939496 width=492) + Merge Join Operator [MERGEJOIN_298] (rows=112200 width=492) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_350] @@ -159,205 +159,205 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_347] Group By Operator [GBY_346] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] - Select Operator [SEL_345] (rows=8029453 width=932) + Select Operator [SEL_345] (rows=589731268 width=932) Output:["_col10"] - Group By Operator [GBY_344] (rows=8029453 width=932) + Group By Operator [GBY_344] (rows=589731268 width=932) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_78] + SHUFFLE [RS_79] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Group By Operator [GBY_77] (rows=8029453 width=932) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col13)"],keys:_col2, _col3, _col6, _col15, _col16, _col19, _col20, _col21, _col22, _col23 - Merge Join Operator [MERGEJOIN_297] (rows=13238221 width=865) - Conds:RS_73._col9, _col12=RS_333._col0, _col1(Inner),Output:["_col2","_col3","_col6","_col13","_col15","_col16","_col19","_col20","_col21","_col22","_col23"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] - PartitionCols:_col0, _col1 - Select Operator [SEL_331] (rows=57591150 width=8) - Output:["_col0","_col1"] - TableScan [TS_23] (rows=57591150 width=8) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + Group By Operator [GBY_78] (rows=589731268 width=932) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col9, _col10, _col13, _col17, _col18, _col21, _col22, _col23, _col24, _col25 + Merge Join Operator [MERGEJOIN_297] (rows=589731268 width=928) + Conds:RS_74._col0=RS_310._col0(Inner),Output:["_col4","_col9","_col10","_col13","_col17","_col18","_col21","_col22","_col23","_col24","_col25"] + <-Map 19 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_310] + PartitionCols:_col0 + Select Operator [SEL_308] (rows=462000 width=384) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + TableScan [TS_9] (rows=462000 width=384) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_73] - PartitionCols:_col9, _col12 - Merge Join Operator [MERGEJOIN_296] (rows=8029453 width=828) - Conds:RS_70._col9=RS_302._col0(Inner),Output:["_col2","_col3","_col6","_col9","_col12","_col13","_col15","_col16","_col19","_col20","_col21","_col22","_col23"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_302] - PartitionCols:_col0 - Select Operator [SEL_300] (rows=462000 width=384) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - TableScan [TS_3] (rows=462000 width=384) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_295] (rows=8029453 width=448) - Conds:RS_67._col7, _col11=RS_316._col3, _col0(Inner),Output:["_col2","_col3","_col6","_col9","_col12","_col13","_col15","_col16"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_316] - PartitionCols:_col3, _col0 - Select Operator [SEL_314] (rows=155 width=267) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_313] (rows=155 width=271) - predicate:((s_market_id = 7) and s_zip is not null) - TableScan [TS_9] (rows=1704 width=270) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col7, _col11 - Merge Join Operator [MERGEJOIN_294] (rows=525333486 width=473) - Conds:RS_64._col0=RS_343._col1(Inner),Output:["_col2","_col3","_col6","_col7","_col9","_col11","_col12","_col13"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] + SHUFFLE [RS_74] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_296] (rows=589731268 width=551) + Conds:RS_71._col2, _col1=RS_72._col9, _col0(Inner),Output:["_col0","_col4","_col9","_col10","_col13","_col17","_col18"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col9, _col0 + Select Operator [SEL_65] (rows=7276996 width=724) + Output:["_col0","_col2","_col3","_col6","_col9","_col10","_col11"] + Filter Operator [FIL_64] (rows=7276996 width=724) + predicate:(_col12 <> _col3) + Merge Join Operator [MERGEJOIN_295] (rows=7276996 width=724) + Conds:RS_61._col0=RS_302._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col10","_col11","_col12"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_302] PartitionCols:_col1 - Select Operator [SEL_342] (rows=525333486 width=122) + Select Operator [SEL_300] (rows=80000000 width=280) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_341] (rows=525333486 width=122) - predicate:((ss_store_sk BETWEEN DynamicValue(RS_68_store_s_store_sk_min) AND DynamicValue(RS_68_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_68_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) - TableScan [TS_50] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_340] - Group By Operator [GBY_339] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_319] - Group By Operator [GBY_318] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_317] (rows=155 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_314] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_64] + Filter Operator [FIL_299] (rows=80000000 width=280) + predicate:c_current_addr_sk is not null + TableScan [TS_0] (rows=80000000 width=280) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_61] PartitionCols:_col0 - Filter Operator [FIL_63] (rows=80000000 width=635) - predicate:(_col4 <> _col8) - Merge Join Operator [MERGEJOIN_293] (rows=80000000 width=635) - Conds:RS_323._col1=RS_312._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col6","_col7","_col8"] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] - PartitionCols:_col0 - Select Operator [SEL_310] (rows=40000000 width=363) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_309] (rows=40000000 width=276) - predicate:ca_zip is not null - TableScan [TS_6] (rows=40000000 width=276) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip","ca_country"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] - PartitionCols:_col1 - Select Operator [SEL_321] (rows=80000000 width=280) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_320] (rows=80000000 width=280) - predicate:c_current_addr_sk is not null - TableScan [TS_12] (rows=80000000 width=280) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Select Operator [SEL_337] (rows=3939496 width=380) + Merge Join Operator [MERGEJOIN_294] (rows=611379 width=452) + Conds:RS_306._col2=RS_321._col3(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_321] + PartitionCols:_col3 + Select Operator [SEL_318] (rows=155 width=267) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_317] (rows=155 width=271) + predicate:((s_market_id = 7) and s_zip is not null) + TableScan [TS_16] (rows=1704 width=270) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_306] + PartitionCols:_col2 + Select Operator [SEL_304] (rows=40000000 width=363) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_303] (rows=40000000 width=276) + predicate:ca_zip is not null + TableScan [TS_3] (rows=40000000 width=276) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip","ca_country"] + <-Reducer 24 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col2, _col1 + Merge Join Operator [MERGEJOIN_293] (rows=537799798 width=118) + Conds:RS_343._col0, _col3=RS_331._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_331] + PartitionCols:_col0, _col1 + Select Operator [SEL_329] (rows=57591150 width=8) + Output:["_col0","_col1"] + TableScan [TS_19] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_343] + PartitionCols:_col0, _col3 + Select Operator [SEL_342] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_341] (rows=525333486 width=122) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_72_customer_c_customer_sk_min) AND DynamicValue(RS_72_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_72_customer_c_customer_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_72_store_s_store_sk_min) AND DynamicValue(RS_72_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_72_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) + TableScan [TS_44] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 13 [BROADCAST_EDGE] vectorized + BROADCAST [RS_338] + Group By Operator [GBY_337] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_227] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_225] (rows=7276996 width=8) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_65] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_340] + Group By Operator [GBY_339] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=6636187)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_232] + Group By Operator [GBY_231] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=6636187)"] + Select Operator [SEL_230] (rows=7276996 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_65] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_336] + Select Operator [SEL_335] (rows=112200 width=380) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_336] (rows=3939496 width=380) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col4, _col5, _col7 - Select Operator [SEL_335] (rows=84010488 width=843) - Output:["_col4","_col5","_col7","_col9"] - Group By Operator [GBY_334] (rows=84010488 width=843) + Group By Operator [GBY_334] (rows=112200 width=380) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col0, _col1, _col7 + Select Operator [SEL_333] (rows=1222708 width=843) + Output:["_col0","_col1","_col7","_col9"] + Group By Operator [GBY_332] (rows=1222708 width=843) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 - <-Reducer 4 [SIMPLE_EDGE] + <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_35] (rows=84010488 width=843) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col12, _col13, _col20, _col6, _col7, _col8, _col9, _col16, _col21 - Merge Join Operator [MERGEJOIN_292] (rows=138508741 width=824) - Conds:RS_31._col0, _col3=RS_332._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col8","_col9","_col12","_col13","_col16","_col20","_col21"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] + Group By Operator [GBY_35] (rows=1222708 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col13)"],keys:_col2, _col3, _col20, _col6, _col15, _col16, _col17, _col18, _col21 + Merge Join Operator [MERGEJOIN_292] (rows=2015888 width=776) + Conds:RS_31._col9, _col12=RS_330._col0, _col1(Inner),Output:["_col2","_col3","_col6","_col13","_col15","_col16","_col17","_col18","_col20","_col21"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_331] - <-Reducer 3 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_329] + <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_31] - PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_291] (rows=84010488 width=820) - Conds:RS_28._col1, _col2=RS_29._col0, _col9(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col8","_col9","_col12","_col13","_col16","_col20","_col21"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0, _col9 - Select Operator [SEL_22] (rows=7276996 width=724) - Output:["_col0","_col2","_col3","_col6","_col9","_col10","_col11"] - Filter Operator [FIL_21] (rows=7276996 width=724) - predicate:(_col12 <> _col3) - Merge Join Operator [MERGEJOIN_290] (rows=7276996 width=724) - Conds:RS_18._col0=RS_322._col1(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col8","_col10","_col11","_col12"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] - PartitionCols:_col1 - Please refer to the previous Select Operator [SEL_321] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_289] (rows=611379 width=452) - Conds:RS_311._col2=RS_315._col3(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_315] - PartitionCols:_col3 - Please refer to the previous Select Operator [SEL_314] - <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_311] - PartitionCols:_col2 - Please refer to the previous Select Operator [SEL_310] - <-Reducer 2 [SIMPLE_EDGE] + PartitionCols:_col9, _col12 + Merge Join Operator [MERGEJOIN_291] (rows=1222708 width=739) + Conds:RS_28._col11, _col7=RS_319._col0, _col3(Inner),Output:["_col2","_col3","_col6","_col9","_col12","_col13","_col15","_col16","_col17","_col18","_col20","_col21"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_319] + PartitionCols:_col0, _col3 + Please refer to the previous Select Operator [SEL_318] + <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_28] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_288] (rows=76612563 width=382) - Conds:RS_330._col0=RS_303._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9"] - <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_303] - PartitionCols:_col0 - Select Operator [SEL_301] (rows=7000 width=295) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_299] (rows=7000 width=384) - predicate:(i_color = 'orchid') - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] + PartitionCols:_col11, _col7 + Merge Join Operator [MERGEJOIN_290] (rows=76612563 width=735) + Conds:RS_25._col0=RS_26._col1(Inner),Output:["_col2","_col3","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18"] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_289] (rows=76612563 width=382) + Conds:RS_328._col0=RS_311._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9"] + <-Map 19 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_311] + PartitionCols:_col0 + Select Operator [SEL_309] (rows=7000 width=295) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_307] (rows=7000 width=384) + predicate:(i_color = 'orchid') + Please refer to the previous TableScan [TS_9] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] + PartitionCols:_col0 + Select Operator [SEL_327] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_326] (rows=525333486 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_13_item_i_item_sk_min) AND DynamicValue(RS_13_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_13_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_29_store_s_store_sk_min) AND DynamicValue(RS_29_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_29_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_316] + Group By Operator [GBY_315] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_314] + Group By Operator [GBY_313] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_312] (rows=7000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_309] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_325] + Group By Operator [GBY_324] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + Group By Operator [GBY_322] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_320] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_318] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_329] (rows=525333486 width=122) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_328] (rows=525333486 width=122) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_29_customer_c_customer_sk_min) AND DynamicValue(RS_29_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_29_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_26_item_i_item_sk_min) AND DynamicValue(RS_26_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_26_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_29_store_s_store_sk_min) AND DynamicValue(RS_29_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_29_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=122) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_325] - Group By Operator [GBY_324] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=6636187)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_149] - Group By Operator [GBY_148] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=6636187)"] - Select Operator [SEL_147] (rows=7276996 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_22] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_327] - Group By Operator [GBY_326] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_154] - Group By Operator [GBY_153] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_152] (rows=7276996 width=8) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_22] - <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - Group By Operator [GBY_305] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_304] (rows=7000 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_301] + Filter Operator [FIL_24] (rows=80000000 width=635) + predicate:(_col4 <> _col8) + Merge Join Operator [MERGEJOIN_288] (rows=80000000 width=635) + Conds:RS_301._col1=RS_305._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_301] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_300] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_305] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_304] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out index a353c6a128..7325033557 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out @@ -109,213 +109,221 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 18 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) -Reducer 10 <- Map 18 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Map 1 <- Reducer 9 (BROADCAST_EDGE) +Map 19 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 18 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) +Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 12 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Reducer 10 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Map 21 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 22 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Map 8 (CUSTOM_SIMPLE_EDGE) -Reducer 15 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 15 <- Map 20 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) Reducer 17 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_259] - Limit [LIM_258] (rows=100 width=808) + Reducer 5 vectorized + File Output Operator [FS_250] + Limit [LIM_249] (rows=100 width=808) Number of rows:100 - Select Operator [SEL_257] (rows=4248052806 width=808) + Select Operator [SEL_248] (rows=21091882 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_256] - Group By Operator [GBY_255] (rows=4248052806 width=808) + <-Reducer 4 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_247] + Group By Operator [GBY_246] (rows=21091882 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 - <-Reducer 5 [SIMPLE_EDGE] + <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_47] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_46] (rows=4248052806 width=808) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","sum(_col16)","sum(_col10)"],keys:_col19, _col20, _col22, _col23 - Top N Key Operator [TNK_93] (rows=4248052806 width=807) - keys:_col19, _col20, _col22, _col23,sort order:++++,top n:100 - Merge Join Operator [MERGEJOIN_212] (rows=4248052806 width=807) - Conds:RS_42._col3=RS_254._col0(Inner),Output:["_col5","_col10","_col16","_col19","_col20","_col22","_col23"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] - PartitionCols:_col0 - Select Operator [SEL_253] (rows=1704 width=192) - Output:["_col0","_col1","_col2"] - TableScan [TS_31] (rows=1704 width=192) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_211] (rows=4248052806 width=623) - Conds:RS_39._col1=RS_252._col0(Inner),Output:["_col3","_col5","_col10","_col16","_col19","_col20"] - <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_252] + Group By Operator [GBY_46] (rows=21091882 width=808) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col13)","sum(_col19)","sum(_col3)"],keys:_col6, _col7, _col22, _col23 + Top N Key Operator [TNK_91] (rows=4248052730 width=807) + keys:_col6, _col7, _col22, _col23,sort order:++++,top n:100 + Merge Join Operator [MERGEJOIN_203] (rows=4248052730 width=807) + Conds:RS_42._col2, _col1=RS_43._col11, _col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22","_col23"] + <-Reducer 2 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_42] + PartitionCols:_col2, _col1 + Merge Join Operator [MERGEJOIN_197] (rows=54418158 width=119) + Conds:RS_223._col0=RS_210._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_210] PartitionCols:_col0 - Select Operator [SEL_251] (rows=462000 width=288) - Output:["_col0","_col1","_col2"] - TableScan [TS_29] (rows=462000 width=288) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_210] (rows=4248052806 width=343) - Conds:RS_36._col1, _col2, _col4=RS_37._col6, _col7, _col8(Inner),Output:["_col1","_col3","_col5","_col10","_col16"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_209] (rows=1893811716 width=235) - Conds:RS_25._col2, _col1=RS_26._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] - <-Reducer 10 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_25] - PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_207] (rows=54418158 width=119) - Conds:RS_243._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_221] - PartitionCols:_col0 - Select Operator [SEL_217] (rows=351 width=4) - Output:["_col0"] - Filter Operator [FIL_214] (rows=351 width=12) - predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) - TableScan [TS_3] (rows=73049 width=12) - default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] - PartitionCols:_col0 - Select Operator [SEL_242] (rows=285117831 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_241] (rows=285117831 width=123) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_23_d3_d_date_sk_min) AND DynamicValue(RS_23_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_235] - Group By Operator [GBY_233] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=9402909 width=8) - Output:["_col0"] - Merge Join Operator [MERGEJOIN_208] (rows=9402909 width=100) - Conds:RS_232._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_223] - PartitionCols:_col0 - Select Operator [SEL_218] (rows=351 width=4) - Output:["_col0"] - Filter Operator [FIL_215] (rows=351 width=12) - predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) - Please refer to the previous TableScan [TS_3] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] - PartitionCols:_col0 - Select Operator [SEL_231] (rows=53632139 width=123) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_230] (rows=53632139 width=123) - predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_12] (rows=57591150 width=123) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_240] - Group By Operator [GBY_238] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 15 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_121] - Group By Operator [GBY_120] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_119] (rows=9402909 width=6) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_208] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_237] - Group By Operator [GBY_236] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_227] - Group By Operator [GBY_225] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=351 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_217] - <-Reducer 15 [SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_26] - PartitionCols:_col1, _col2 - Please refer to the previous Merge Join Operator [MERGEJOIN_208] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_206] (rows=13737330 width=8) - Conds:RS_250._col0=RS_219._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_219] - PartitionCols:_col0 - Select Operator [SEL_216] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_213] (rows=50 width=12) - predicate:((d_moy = 4) and (d_year = 2000)) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] - PartitionCols:_col0 - Select Operator [SEL_249] (rows=501694138 width=126) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_248] (rows=501694138 width=126) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_25_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_25_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_25_catalog_sales_cs_bill_customer_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_25_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_25_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_34_d1_d_date_sk_min) AND DynamicValue(RS_34_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_34_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=126) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] - <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_234] - Please refer to the previous Group By Operator [GBY_233] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Please refer to the previous Group By Operator [GBY_238] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_245] - Group By Operator [GBY_244] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 10 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_111] - Group By Operator [GBY_110] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=54418158 width=8) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_207] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_247] - Group By Operator [GBY_246] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 10 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_126] - Group By Operator [GBY_125] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_124] (rows=54418158 width=7) - Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_207] - <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_229] - Group By Operator [GBY_228] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_226] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_220] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] + Select Operator [SEL_207] (rows=351 width=4) + Output:["_col0"] + Filter Operator [FIL_204] (rows=351 width=12) + predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) + TableScan [TS_3] (rows=73049 width=12) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + PartitionCols:_col0 + Select Operator [SEL_222] (rows=285117831 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_221] (rows=285117831 width=123) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_40_d3_d_date_sk_min) AND DynamicValue(RS_40_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_217] + Group By Operator [GBY_215] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_211] (rows=351 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_207] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col11, _col12 + Select Operator [SEL_38] (rows=21091882 width=620) + Output:["_col1","_col2","_col8","_col11","_col12","_col14","_col17","_col18"] + Merge Join Operator [MERGEJOIN_202] (rows=21091882 width=620) + Conds:RS_35._col3=RS_245._col0(Inner),Output:["_col5","_col8","_col9","_col11","_col14","_col15","_col17","_col18"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] + PartitionCols:_col0 + Select Operator [SEL_244] (rows=1704 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_24] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_201] (rows=21091882 width=434) + Conds:RS_32._col1=RS_243._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col11","_col14","_col15"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_243] + PartitionCols:_col0 + Select Operator [SEL_242] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + TableScan [TS_22] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_200] (rows=21091882 width=154) + Conds:RS_29._col1, _col2, _col4=RS_30._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col8","_col9","_col11"] + <-Reducer 15 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_30] + PartitionCols:_col1, _col2, _col3 + Merge Join Operator [MERGEJOIN_199] (rows=9402909 width=100) + Conds:RS_228._col0=RS_214._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_214] + PartitionCols:_col0 + Select Operator [SEL_209] (rows=351 width=4) + Output:["_col0"] + Filter Operator [FIL_206] (rows=351 width=12) + predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) + Please refer to the previous TableScan [TS_3] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col0 + Select Operator [SEL_227] (rows=53632139 width=123) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_226] (rows=53632139 width=123) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_12] (rows=57591150 width=123) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1, _col2, _col4 + Merge Join Operator [MERGEJOIN_198] (rows=13737330 width=8) + Conds:RS_241._col0=RS_212._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_212] + PartitionCols:_col0 + Select Operator [SEL_208] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_205] (rows=50 width=12) + predicate:((d_moy = 4) and (d_year = 2000)) + Please refer to the previous TableScan [TS_3] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_241] + PartitionCols:_col0 + Select Operator [SEL_240] (rows=501694138 width=126) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_239] (rows=501694138 width=126) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_30_store_returns_sr_customer_sk_min) AND DynamicValue(RS_30_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_30_store_returns_sr_customer_sk_bloom_filter))) and (ss_customer_sk BETWEEN DynamicValue(RS_42_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_42_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_42_catalog_sales_cs_bill_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_30_store_returns_sr_item_sk_min) AND DynamicValue(RS_30_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_30_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_42_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_42_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_42_catalog_sales_cs_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_30_store_returns_sr_ticket_number_min) AND DynamicValue(RS_30_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_30_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=126) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_218] + Group By Operator [GBY_216] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_213] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_208] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_230] + Group By Operator [GBY_229] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_119] + Group By Operator [GBY_118] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_117] (rows=9402909 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_199] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_232] + Group By Operator [GBY_231] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_124] + Group By Operator [GBY_123] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_122] (rows=9402909 width=6) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_199] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_234] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=6124198)"] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_129] + Group By Operator [GBY_128] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=6124198)"] + Select Operator [SEL_127] (rows=9402909 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_199] + <-Reducer 6 [BROADCAST_EDGE] vectorized + BROADCAST [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_142] (rows=54418158 width=8) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_197] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Group By Operator [GBY_237] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_149] + Group By Operator [GBY_148] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_147] (rows=54418158 width=7) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_197] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out index 0210163c8a..21193d658d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 32' is a cross product -Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[264][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[274][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -133,29 +133,29 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 14 (BROADCAST_EDGE) -Map 16 <- Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 17 (CONTAINS) -Map 22 <- Reducer 24 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE), Union 17 (CONTAINS) -Reducer 12 <- Map 11 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) -Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE) -Reducer 19 <- Map 25 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE) -Reducer 20 <- Map 27 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (SIMPLE_EDGE) -Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) -Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) -Reducer 29 <- Map 28 (SIMPLE_EDGE) -Reducer 3 <- Reducer 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Map 28 (SIMPLE_EDGE) -Reducer 32 <- Reducer 31 (CUSTOM_SIMPLE_EDGE), Reducer 34 (CUSTOM_SIMPLE_EDGE) -Reducer 33 <- Map 28 (SIMPLE_EDGE) -Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 30 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Reducer 32 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Map 1 <- Reducer 21 (BROADCAST_EDGE) +Map 23 <- Reducer 31 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE), Union 24 (CONTAINS) +Map 29 <- Reducer 31 (BROADCAST_EDGE), Reducer 33 (BROADCAST_EDGE), Union 24 (CONTAINS) +Reducer 11 <- Map 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 10 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 16 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 10 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 12 (CUSTOM_SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 30 (SIMPLE_EDGE), Union 24 (SIMPLE_EDGE) +Reducer 26 <- Map 32 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 34 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE) +Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (CUSTOM_SIMPLE_EDGE) +Reducer 33 <- Map 32 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 20 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) @@ -165,25 +165,25 @@ Stage-0 limit:100 Stage-1 Reducer 9 vectorized - File Output Operator [FS_352] - Limit [LIM_351] (rows=1 width=16) + File Output Operator [FS_354] + Limit [LIM_353] (rows=1 width=16) Number of rows:100 - Select Operator [SEL_350] (rows=1 width=16) + Select Operator [SEL_352] (rows=1 width=16) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] - Select Operator [SEL_348] (rows=1 width=16) + SHUFFLE [RS_351] + Select Operator [SEL_350] (rows=1 width=16) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_347] (rows=1 width=12) + Group By Operator [GBY_349] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_346] + SHUFFLE [RS_348] PartitionCols:_col0 - Group By Operator [GBY_345] (rows=1 width=12) + Group By Operator [GBY_347] (rows=1 width=12) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_344] (rows=1 width=116) + Select Operator [SEL_346] (rows=1 width=116) Output:["_col0"] - Group By Operator [GBY_343] (rows=1 width=116) + Group By Operator [GBY_345] (rows=1 width=116) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_118] @@ -196,17 +196,17 @@ Stage-0 predicate:_col2 BETWEEN _col3 AND _col4 Select Operator [SEL_114] (rows=5618315000 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_272] (rows=5618315000 width=127) + Merge Join Operator [MERGEJOIN_274] (rows=5618315000 width=127) Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] - <-Reducer 32 [CUSTOM_SIMPLE_EDGE] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_111] - Merge Join Operator [MERGEJOIN_269] (rows=25 width=4) + Merge Join Operator [MERGEJOIN_272] (rows=25 width=4) Conds:(Right Outer),Output:["_col0"] - <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_334] - Group By Operator [GBY_333] (rows=25 width=4) + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_336] + Group By Operator [GBY_335] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 28 [SIMPLE_EDGE] vectorized + <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_322] PartitionCols:_col0 Group By Operator [GBY_319] (rows=25 width=4) @@ -215,23 +215,23 @@ Stage-0 Output:["_col0"] Filter Operator [FIL_314] (rows=50 width=12) predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_72] (rows=73049 width=12) + TableScan [TS_26] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] - <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_342] - Select Operator [SEL_341] (rows=1 width=8) - Filter Operator [FIL_340] (rows=1 width=8) + <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + Select Operator [SEL_343] (rows=1 width=8) + Filter Operator [FIL_342] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_339] (rows=1 width=8) + Group By Operator [GBY_341] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Group By Operator [GBY_337] (rows=1 width=8) + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_340] + Group By Operator [GBY_339] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_336] (rows=25 width=4) - Group By Operator [GBY_335] (rows=25 width=4) + Select Operator [SEL_338] (rows=25 width=4) + Group By Operator [GBY_337] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 28 [SIMPLE_EDGE] vectorized + <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_323] PartitionCols:_col0 Group By Operator [GBY_320] (rows=25 width=4) @@ -243,13 +243,13 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_112] Select Operator [SEL_107] (rows=224732600 width=119) Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_271] (rows=224732600 width=119) - Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + Merge Join Operator [MERGEJOIN_273] (rows=224732600 width=119) + Conds:(Left Outer),Output:["_col2","_col5","_col11","_col13"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_326] Group By Operator [GBY_324] (rows=25 width=4) Output:["_col0"],keys:KEY._col0 - <-Map 28 [SIMPLE_EDGE] vectorized + <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_321] PartitionCols:_col0 Group By Operator [GBY_318] (rows=25 width=4) @@ -259,181 +259,181 @@ Stage-0 Please refer to the previous Filter Operator [FIL_314] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_104] - Merge Join Operator [MERGEJOIN_270] (rows=8989304 width=8) - Conds:(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_101] - Merge Join Operator [MERGEJOIN_268] (rows=8989304 width=8) - Conds:RS_98._col1=RS_99._col5(Inner),Output:["_col2","_col4","_col10"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_99] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_267] (rows=55046 width=4) - Conds:RS_68._col0=RS_306._col1(Inner),Output:["_col5"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_68] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_263] (rows=39720279 width=4) - Conds:RS_285._col1, _col2=RS_288._col0, _col1(Inner),Output:["_col0"] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - PartitionCols:_col1, _col2 - Select Operator [SEL_284] (rows=40000000 width=188) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_283] (rows=40000000 width=188) - predicate:(ca_county is not null and ca_state is not null) - TableScan [TS_28] (rows=40000000 width=188) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_288] - PartitionCols:_col0, _col1 - Select Operator [SEL_287] (rows=1704 width=184) - Output:["_col0","_col1"] - Filter Operator [FIL_286] (rows=1704 width=184) - predicate:(s_county is not null and s_state is not null) - TableScan [TS_31] (rows=1704 width=184) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] - <-Reducer 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] - PartitionCols:_col1 - Select Operator [SEL_305] (rows=55046 width=8) + Merge Join Operator [MERGEJOIN_271] (rows=8989304 width=8) + Conds:RS_101._col1=RS_102._col5(Inner),Output:["_col2","_col5","_col11"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_270] (rows=55046 width=4) + Conds:RS_83._col0=RS_308._col1(Inner),Output:["_col5"] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_266] (rows=39720279 width=4) + Conds:RS_287._col1, _col2=RS_290._col0, _col1(Inner),Output:["_col0"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_287] + PartitionCols:_col1, _col2 + Select Operator [SEL_286] (rows=40000000 width=188) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_285] (rows=40000000 width=188) + predicate:(ca_county is not null and ca_state is not null) + TableScan [TS_43] (rows=40000000 width=188) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_state"] + <-Map 22 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_290] + PartitionCols:_col0, _col1 + Select Operator [SEL_289] (rows=1704 width=184) Output:["_col0","_col1"] - Group By Operator [GBY_304] (rows=55046 width=8) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_62] - PartitionCols:_col0, _col1 - Group By Operator [GBY_61] (rows=55046 width=8) - Output:["_col0","_col1"],keys:_col6, _col5 - Merge Join Operator [MERGEJOIN_266] (rows=110092 width=8) - Conds:RS_57._col1=RS_303._col0(Inner),Output:["_col5","_col6"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_303] + Filter Operator [FIL_288] (rows=1704 width=184) + predicate:(s_county is not null and s_state is not null) + TableScan [TS_46] (rows=1704 width=184) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_county","s_state"] + <-Reducer 28 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_308] + PartitionCols:_col1 + Select Operator [SEL_307] (rows=55046 width=8) + Output:["_col0","_col1"] + Group By Operator [GBY_306] (rows=55046 width=8) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0, _col1 + Group By Operator [GBY_76] (rows=55046 width=8) + Output:["_col0","_col1"],keys:_col6, _col5 + Merge Join Operator [MERGEJOIN_269] (rows=110092 width=8) + Conds:RS_72._col1=RS_305._col0(Inner),Output:["_col5","_col6"] + <-Map 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_305] + PartitionCols:_col0 + Select Operator [SEL_304] (rows=80000000 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_303] (rows=80000000 width=8) + predicate:c_current_addr_sk is not null + TableScan [TS_63] (rows=80000000 width=8) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_268] (rows=110092 width=0) + Conds:RS_69._col2=RS_299._col0(Inner),Output:["_col1"] + <-Map 32 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_299] PartitionCols:_col0 - Select Operator [SEL_302] (rows=80000000 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_301] (rows=80000000 width=8) - predicate:c_current_addr_sk is not null - TableScan [TS_48] (rows=80000000 width=8) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_57] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_265] (rows=110092 width=0) - Conds:RS_54._col2=RS_297._col0(Inner),Output:["_col1"] - <-Map 25 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_297] + Select Operator [SEL_298] (rows=453 width=4) + Output:["_col0"] + Filter Operator [FIL_297] (rows=453 width=186) + predicate:((i_category = 'Jewelry') and (i_class = 'consignment')) + TableScan [TS_60] (rows=462000 width=186) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_267] (rows=11665117 width=7) + Conds:Union 24._col0=RS_293._col0(Inner),Output:["_col1","_col2"] + <-Map 30 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_293] PartitionCols:_col0 - Select Operator [SEL_296] (rows=453 width=4) + Select Operator [SEL_292] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_295] (rows=453 width=186) - predicate:((i_category = 'Jewelry') and (i_class = 'consignment')) - TableScan [TS_45] (rows=462000 width=186) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_class","i_category"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_54] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_264] (rows=11665117 width=7) - Conds:Union 17._col0=RS_291._col0(Inner),Output:["_col1","_col2"] - <-Map 23 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_291] - PartitionCols:_col0 - Select Operator [SEL_290] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_289] (rows=50 width=12) - predicate:((d_moy = 3) and (d_year = 1999)) - TableScan [TS_42] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Union 17 [SIMPLE_EDGE] - <-Map 16 [CONTAINS] vectorized - Reduce Output Operator [RS_361] - PartitionCols:_col0 - Select Operator [SEL_360] (rows=285117831 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_359] (rows=285117831 width=11) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_55_item_i_item_sk_min) AND DynamicValue(RS_55_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_55_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_273] (rows=287989836 width=11) - Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_354] - Group By Operator [GBY_353] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_294] - Group By Operator [GBY_293] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_292] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_290] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_357] - Group By Operator [GBY_356] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_300] - Group By Operator [GBY_299] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_298] (rows=453 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_296] - <-Map 22 [CONTAINS] vectorized - Reduce Output Operator [RS_364] - PartitionCols:_col0 - Select Operator [SEL_363] (rows=143930993 width=11) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_362] (rows=143930993 width=11) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_55_item_i_item_sk_min) AND DynamicValue(RS_55_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_55_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_52_date_dim_d_date_sk_min) AND DynamicValue(RS_52_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_52_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_278] (rows=144002668 width=11) - Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_355] - Please refer to the previous Group By Operator [GBY_353] - <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_358] - Please refer to the previous Group By Operator [GBY_356] + Filter Operator [FIL_291] (rows=50 width=12) + predicate:((d_moy = 3) and (d_year = 1999)) + TableScan [TS_57] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Union 24 [SIMPLE_EDGE] + <-Map 23 [CONTAINS] vectorized + Reduce Output Operator [RS_363] + PartitionCols:_col0 + Select Operator [SEL_362] (rows=285117831 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_361] (rows=285117831 width=11) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_70_item_i_item_sk_min) AND DynamicValue(RS_70_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_70_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_275] (rows=287989836 width=11) + Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_356] + Group By Operator [GBY_355] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_296] + Group By Operator [GBY_295] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_294] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_292] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_359] + Group By Operator [GBY_358] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 32 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_302] + Group By Operator [GBY_301] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_300] (rows=453 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_298] + <-Map 29 [CONTAINS] vectorized + Reduce Output Operator [RS_366] + PartitionCols:_col0 + Select Operator [SEL_365] (rows=143930993 width=11) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_364] (rows=143930993 width=11) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_70_item_i_item_sk_min) AND DynamicValue(RS_70_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_70_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_67_date_dim_d_date_sk_min) AND DynamicValue(RS_67_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_67_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) + TableScan [TS_280] (rows=144002668 width=11) + Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_357] + Please refer to the previous Group By Operator [GBY_355] + <-Reducer 33 [BROADCAST_EDGE] vectorized + BROADCAST [RS_360] + Please refer to the previous Group By Operator [GBY_358] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_265] (rows=525327388 width=114) + Conds:RS_98._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col5"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + PartitionCols:_col0 + Select Operator [SEL_333] (rows=73049 width=8) + Output:["_col0","_col1"] + TableScan [TS_41] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_98] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_262] (rows=525327388 width=114) - Conds:RS_311._col0=RS_313._col0(Inner),Output:["_col1","_col2","_col4"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_311] - PartitionCols:_col0 - Select Operator [SEL_310] (rows=525327388 width=114) + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_264] (rows=525327388 width=114) + Conds:(Inner),Output:["_col0","_col1","_col2"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_313] + Select Operator [SEL_312] (rows=525327388 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_309] (rows=525327388 width=114) - predicate:((ss_customer_sk BETWEEN DynamicValue(RS_99_customer_c_customer_sk_min) AND DynamicValue(RS_99_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_99_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) + Filter Operator [FIL_311] (rows=525327388 width=114) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_102_customer_c_customer_sk_min) AND DynamicValue(RS_102_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_102_customer_c_customer_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_23] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_308] - Group By Operator [GBY_307] (rows=1 width=12) + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_310] + Group By Operator [GBY_309] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_182] - Group By Operator [GBY_181] (rows=1 width=12) + <-Reducer 20 [CUSTOM_SIMPLE_EDGE] + SHUFFLE [RS_184] + Group By Operator [GBY_183] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_180] (rows=55046 width=8) + Select Operator [SEL_182] (rows=55046 width=8) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_267] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_313] - PartitionCols:_col0 - Select Operator [SEL_312] (rows=73049 width=8) - Output:["_col0","_col1"] - TableScan [TS_26] (rows=73049 width=8) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_332] - Select Operator [SEL_331] (rows=1 width=8) - Filter Operator [FIL_330] (rows=1 width=8) - predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_329] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_328] - Group By Operator [GBY_327] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_325] (rows=25 width=4) - Please refer to the previous Group By Operator [GBY_324] + Please refer to the previous Merge Join Operator [MERGEJOIN_270] + <-Reducer 12 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_332] + Select Operator [SEL_331] (rows=1 width=8) + Filter Operator [FIL_330] (rows=1 width=8) + predicate:(sq_count_check(_col0) <= 1) + Group By Operator [GBY_329] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_328] + Group By Operator [GBY_327] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_325] (rows=25 width=4) + Please refer to the previous Group By Operator [GBY_324] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out index aebd6b4137..d741dfd1ee 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out @@ -265,494 +265,502 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 33 <- Reducer 29 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE) -Map 39 <- Reducer 36 (BROADCAST_EDGE) -Map 50 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE), Reducer 46 (BROADCAST_EDGE) -Map 51 <- Reducer 38 (BROADCAST_EDGE) -Reducer 10 <- Reducer 15 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Reducer 20 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 14 <- Map 49 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Map 25 <- Reducer 30 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE) +Map 39 <- Reducer 30 (BROADCAST_EDGE) +Map 51 <- Reducer 16 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE), Reducer 37 (BROADCAST_EDGE), Reducer 46 (BROADCAST_EDGE) +Map 52 <- Reducer 34 (BROADCAST_EDGE) +Reducer 10 <- Map 48 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 49 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 50 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) Reducer 15 <- Reducer 14 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 18 <- Map 48 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 16 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) -Reducer 20 <- Map 48 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 22 <- Map 21 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 23 <- Map 47 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) -Reducer 24 <- Map 21 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 25 <- Map 47 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) -Reducer 27 <- Map 26 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) -Reducer 28 <- Reducer 27 (SIMPLE_EDGE), Reducer 41 (ONE_TO_ONE_EDGE) -Reducer 29 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 21 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 26 (SIMPLE_EDGE), Reducer 37 (SIMPLE_EDGE) -Reducer 31 <- Reducer 30 (SIMPLE_EDGE), Reducer 45 (ONE_TO_ONE_EDGE) -Reducer 32 <- Map 26 (CUSTOM_SIMPLE_EDGE) -Reducer 34 <- Map 33 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 16 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Reducer 33 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 18 <- Map 38 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 45 (ONE_TO_ONE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 49 (SIMPLE_EDGE) +Reducer 20 <- Map 47 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 48 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 49 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 50 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Reducer 23 (SIMPLE_EDGE) +Reducer 26 <- Map 25 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) +Reducer 27 <- Map 35 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Map 38 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 3 <- Map 47 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 29 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE) +Reducer 32 <- Map 35 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Map 38 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 34 <- Map 29 (CUSTOM_SIMPLE_EDGE) Reducer 36 <- Map 35 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 35 (SIMPLE_EDGE), Map 50 (SIMPLE_EDGE) -Reducer 38 <- Map 35 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Map 26 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 37 <- Map 35 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 35 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) Reducer 41 <- Reducer 40 (SIMPLE_EDGE) Reducer 42 <- Reducer 41 (CUSTOM_SIMPLE_EDGE) -Reducer 44 <- Map 43 (SIMPLE_EDGE), Map 51 (SIMPLE_EDGE) +Reducer 44 <- Map 43 (SIMPLE_EDGE), Map 52 (SIMPLE_EDGE) Reducer 45 <- Reducer 44 (SIMPLE_EDGE) Reducer 46 <- Reducer 45 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Map 26 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 49 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 49 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Map 35 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 28 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 38 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 41 (ONE_TO_ONE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 47 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 11 vectorized - File Output Operator [FS_1001] - Select Operator [SEL_1000] (rows=2169965329 width=1702) + Reducer 15 vectorized + File Output Operator [FS_1045] + Select Operator [SEL_1044] (rows=732552381 width=1702) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] - <-Reducer 10 [SIMPLE_EDGE] + <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_199] - Select Operator [SEL_198] (rows=2169965329 width=1694) + Select Operator [SEL_198] (rows=732552381 width=1694) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] - Filter Operator [FIL_197] (rows=2169965329 width=1694) + Filter Operator [FIL_197] (rows=732552381 width=1694) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_897] (rows=6509895988 width=1694) - Conds:RS_971._col2, _col1, _col3=RS_999._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] - <-Reducer 9 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_971] + Merge Join Operator [MERGEJOIN_939] (rows=2197657144 width=1694) + Conds:RS_1015._col2, _col1, _col3=RS_1043._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1015] PartitionCols:_col2, _col1, _col3 - Select Operator [SEL_970] (rows=2299138 width=1354) + Select Operator [SEL_1014] (rows=1434227 width=1354) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Group By Operator [GBY_969] (rows=2299138 width=1362) + Group By Operator [GBY_1013] (rows=1434227 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 8 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_93] (rows=2299138 width=1362) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col37)","sum(_col38)","sum(_col39)"],keys:_col26, _col40, _col27, _col7, _col8, _col9, _col10, _col13, _col15, _col21, _col22, _col23, _col24, _col41 - Select Operator [SEL_92] (rows=2331650 width=1292) - Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col21","_col22","_col23","_col24","_col26","_col27","_col37","_col38","_col39","_col40","_col41"] - Filter Operator [FIL_91] (rows=2331650 width=1292) - predicate:(_col45 <> _col17) - Merge Join Operator [MERGEJOIN_881] (rows=2331650 width=1292) - Conds:RS_88._col32=RS_926._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col37","_col38","_col39","_col40","_col41","_col45"] + Group By Operator [GBY_93] (rows=1434227 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col26)","sum(_col27)","sum(_col28)"],keys:_col37, _col29, _col38, _col40, _col41, _col42, _col43, _col7, _col8, _col9, _col10, _col13, _col15, _col30 + Merge Join Operator [MERGEJOIN_923] (rows=2364621 width=1153) + Conds:RS_89._col19, _col25=RS_1011._col0, _col1(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col26","_col27","_col28","_col29","_col30","_col37","_col38","_col40","_col41","_col42","_col43"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1011] + PartitionCols:_col0, _col1 + Select Operator [SEL_1010] (rows=57591150 width=8) + Output:["_col0","_col1"] + TableScan [TS_56] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col19, _col25 + Merge Join Operator [MERGEJOIN_922] (rows=1434227 width=1030) + Conds:RS_86._col23=RS_944._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col19","_col25","_col26","_col27","_col28","_col29","_col30","_col37","_col38","_col40","_col41","_col42","_col43"] <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_926] + SHUFFLE [RS_944] PartitionCols:_col0 - Select Operator [SEL_925] (rows=1861800 width=89) - Output:["_col0","_col1"] - TableScan [TS_68] (rows=1861800 width=89) - default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_88] - PartitionCols:_col32 - Merge Join Operator [MERGEJOIN_880] (rows=2299138 width=1205) - Conds:RS_85._col0=RS_86._col13(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col32","_col37","_col38","_col39","_col40","_col41"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_85] + Select Operator [SEL_943] (rows=40000000 width=365) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_54] (rows=40000000 width=365) + default@customer_address,ad1,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_86] + PartitionCols:_col23 + Merge Join Operator [MERGEJOIN_921] (rows=1434227 width=669) + Conds:RS_83._col24=RS_1008._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col19","_col23","_col25","_col26","_col27","_col28","_col29","_col30","_col37","_col38"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1008] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_871] (rows=70357394 width=458) - Conds:RS_82._col1=RS_927._col0(Inner),Output:["_col0","_col7","_col8","_col9","_col10","_col13","_col15","_col17"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_927] + Select Operator [SEL_1007] (rows=1704 width=181) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1006] (rows=1704 width=181) + predicate:(s_store_name is not null and s_zip is not null) + TableScan [TS_51] (rows=1704 width=181) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col24 + Merge Join Operator [MERGEJOIN_920] (rows=1434227 width=492) + Conds:RS_80._col22=RS_949._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col19","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_949] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_925] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_82] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_870] (rows=69376329 width=376) - Conds:RS_79._col4=RS_914._col0(Inner),Output:["_col0","_col1","_col7","_col8","_col9","_col10","_col13","_col15"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_914] + Select Operator [SEL_948] (rows=7200 width=4) + Output:["_col0"] + Filter Operator [FIL_947] (rows=7200 width=8) + predicate:hd_income_band_sk is not null + TableScan [TS_48] (rows=7200 width=8) + default@household_demographics,hd1,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_80] + PartitionCols:_col22 + Merge Join Operator [MERGEJOIN_919] (rows=1434227 width=492) + Conds:RS_77._col19=RS_992._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col19","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30"] + <-Reducer 41 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_992] PartitionCols:_col0 - Select Operator [SEL_910] (rows=73049 width=8) - Output:["_col0","_col1"] - TableScan [TS_8] (rows=73049 width=8) - default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_79] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_869] (rows=69376329 width=376) - Conds:RS_76._col5=RS_913._col0(Inner),Output:["_col0","_col1","_col4","_col7","_col8","_col9","_col10","_col13"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_913] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_910] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_76] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_868] (rows=69376329 width=376) - Conds:RS_73._col2=RS_907._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col7","_col8","_col9","_col10"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_907] + Select Operator [SEL_991] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_990] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_989] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_44] PartitionCols:_col0 - Select Operator [SEL_906] (rows=7200 width=4) - Output:["_col0"] - Filter Operator [FIL_905] (rows=7200 width=8) - predicate:hd_income_band_sk is not null - TableScan [TS_5] (rows=7200 width=8) - default@household_demographics,hd2,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_73] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_867] (rows=69376329 width=380) - Conds:RS_900._col3=RS_902._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col7","_col8","_col9","_col10"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_902] - PartitionCols:_col0 - Select Operator [SEL_901] (rows=40000000 width=365) - Output:["_col0","_col1","_col2","_col3","_col4"] - TableScan [TS_3] (rows=40000000 width=365) - default@customer_address,ad2,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_900] - PartitionCols:_col3 - Select Operator [SEL_899] (rows=69376329 width=23) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_898] (rows=69376329 width=23) - predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) - TableScan [TS_0] (rows=80000000 width=23) - default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_86] - PartitionCols:_col13 - Select Operator [SEL_67] (rows=2651207 width=784) - Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col13","_col14","_col19","_col20","_col21","_col22","_col23"] - Merge Join Operator [MERGEJOIN_879] (rows=2651207 width=784) - Conds:RS_64._col1, _col7=RS_967._col0, _col1(Inner),Output:["_col2","_col3","_col8","_col9","_col10","_col11","_col12","_col17","_col18","_col20","_col21","_col22","_col23"] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_967] - PartitionCols:_col0, _col1 - Select Operator [SEL_966] (rows=57591150 width=8) - Output:["_col0","_col1"] - TableScan [TS_44] (rows=57591150 width=8) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_64] - PartitionCols:_col1, _col7 - Merge Join Operator [MERGEJOIN_878] (rows=1608052 width=657) - Conds:RS_61._col5=RS_903._col0(Inner),Output:["_col1","_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col17","_col18","_col20","_col21","_col22","_col23"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_903] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_901] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_877] (rows=1608052 width=296) - Conds:RS_58._col6=RS_964._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col17","_col18"] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_964] + Group By Operator [GBY_43] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_918] (rows=183085709 width=227) + Conds:RS_985._col0, _col1=RS_987._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_987] + PartitionCols:_col0, _col1 + Select Operator [SEL_986] (rows=28798881 width=120) + Output:["_col0","_col1","_col2"] + TableScan [TS_37] (rows=28798881 width=337) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_985] + PartitionCols:_col0, _col1 + Select Operator [SEL_984] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_983] (rows=287989836 width=119) + predicate:(cs_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) + TableScan [TS_35] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_980] + Group By Operator [GBY_978] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_976] + Group By Operator [GBY_974] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_971] (rows=518 width=4) + Output:["_col0"] + Select Operator [SEL_969] (rows=518 width=111) + Output:["_col0","_col1"] + Filter Operator [FIL_968] (rows=518 width=312) + predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50) + TableScan [TS_15] (rows=462000 width=311) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col19 + Filter Operator [FIL_76] (rows=1434227 width=662) + predicate:(_col17 <> _col33) + Merge Join Operator [MERGEJOIN_917] (rows=1434227 width=662) + Conds:RS_73._col1=RS_1003._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col19","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col33"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1003] PartitionCols:_col0 - Select Operator [SEL_963] (rows=1704 width=181) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_962] (rows=1704 width=181) - predicate:(s_store_name is not null and s_zip is not null) - TableScan [TS_39] (rows=1704 width=181) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_58] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_876] (rows=1608052 width=119) - Conds:RS_55._col4=RS_908._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_908] + Select Operator [SEL_1001] (rows=1861800 width=89) + Output:["_col0","_col1"] + TableScan [TS_21] (rows=1861800 width=89) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_916] (rows=1414229 width=573) + Conds:RS_70._col0=RS_71._col4(Inner),Output:["_col1","_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col19","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_70] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_906] - <-Reducer 28 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_875] (rows=1608052 width=119) - Conds:RS_52._col1=RS_953._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Reducer 41 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_953] + Merge Join Operator [MERGEJOIN_912] (rows=69376329 width=376) + Conds:RS_67._col4=RS_958._col0(Inner),Output:["_col0","_col1","_col7","_col8","_col9","_col10","_col13","_col15"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_958] PartitionCols:_col0 - Select Operator [SEL_952] (rows=13257 width=4) - Output:["_col0"] - Filter Operator [FIL_951] (rows=13257 width=228) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_950] (rows=39773 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 40 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Group By Operator [GBY_31] (rows=6482999 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 - Merge Join Operator [MERGEJOIN_874] (rows=183085709 width=227) - Conds:RS_946._col0, _col1=RS_948._col0, _col1(Inner),Output:["_col0","_col2","_col5"] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_948] - PartitionCols:_col0, _col1 - Select Operator [SEL_947] (rows=28798881 width=120) - Output:["_col0","_col1","_col2"] - TableScan [TS_25] (rows=28798881 width=337) - default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] - <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_946] - PartitionCols:_col0, _col1 - Select Operator [SEL_945] (rows=287989836 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_944] (rows=287989836 width=119) - predicate:(cs_item_sk BETWEEN DynamicValue(RS_47_item_i_item_sk_min) AND DynamicValue(RS_47_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_47_item_i_item_sk_bloom_filter))) - TableScan [TS_23] (rows=287989836 width=119) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_941] - Group By Operator [GBY_939] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_937] - Group By Operator [GBY_935] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_932] (rows=518 width=4) - Output:["_col0"] - Select Operator [SEL_930] (rows=518 width=111) - Output:["_col0","_col1"] - Filter Operator [FIL_929] (rows=518 width=312) - predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50) - TableScan [TS_17] (rows=462000 width=311) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] - <-Reducer 27 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_873] (rows=1608052 width=119) - Conds:RS_49._col0=RS_917._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_917] - PartitionCols:_col0 - Select Operator [SEL_915] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_911] (rows=652 width=8) - predicate:(d_year = 2000) - Please refer to the previous TableScan [TS_8] - <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_49] + Select Operator [SEL_954] (rows=73049 width=8) + Output:["_col0","_col1"] + TableScan [TS_18] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_911] (rows=69376329 width=376) + Conds:RS_64._col5=RS_957._col0(Inner),Output:["_col0","_col1","_col4","_col7","_col8","_col9","_col10","_col13"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_957] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_872] (rows=4503592 width=119) - Conds:RS_961._col1=RS_931._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Map 35 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_931] + Select Operator [SEL_953] (rows=73049 width=8) + Output:["_col0","_col1"] + Please refer to the previous TableScan [TS_18] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col5 + Merge Join Operator [MERGEJOIN_910] (rows=69376329 width=376) + Conds:RS_61._col2=RS_950._col0(Inner),Output:["_col0","_col1","_col4","_col5","_col7","_col8","_col9","_col10"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_950] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_930] - <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_961] - PartitionCols:_col1 - Select Operator [SEL_960] (rows=417313408 width=351) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_959] (rows=417313408 width=355) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_47_item_i_item_sk_min) AND DynamicValue(RS_47_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_47_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_53_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_53_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_53_catalog_sales_cs_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_50_d1_d_date_sk_min) AND DynamicValue(RS_50_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_14] (rows=575995635 width=355) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 36 [BROADCAST_EDGE] vectorized - BROADCAST [RS_940] - Please refer to the previous Group By Operator [GBY_939] - <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_943] - Group By Operator [GBY_942] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_923] - Group By Operator [GBY_921] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_918] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_915] - <-Reducer 42 [BROADCAST_EDGE] vectorized - BROADCAST [RS_958] - Group By Operator [GBY_957] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_956] - Group By Operator [GBY_955] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_954] (rows=13257 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_952] - <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_999] + Please refer to the previous Select Operator [SEL_948] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_909] (rows=69376329 width=380) + Conds:RS_942._col3=RS_945._col0(Inner),Output:["_col0","_col1","_col2","_col4","_col5","_col7","_col8","_col9","_col10"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_945] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_943] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_942] + PartitionCols:_col3 + Select Operator [SEL_941] (rows=69376329 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_940] (rows=69376329 width=23) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) + TableScan [TS_0] (rows=80000000 width=23) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col4 + Select Operator [SEL_32] (rows=1630791 width=208) + Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Merge Join Operator [MERGEJOIN_915] (rows=1630791 width=208) + Conds:RS_29._col3=RS_1002._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1002] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1001] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_914] (rows=1608052 width=119) + Conds:RS_26._col0=RS_960._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_960] + PartitionCols:_col0 + Select Operator [SEL_956] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_952] (rows=652 width=8) + predicate:(d_year = 2000) + Please refer to the previous TableScan [TS_18] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_913] (rows=4503592 width=119) + Conds:RS_1000._col1=RS_970._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 29 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_970] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_969] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1000] + PartitionCols:_col1 + Select Operator [SEL_999] (rows=417313408 width=351) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_998] (rows=417313408 width=355) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_78_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_78_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_78_catalog_sales_cs_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_12] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 30 [BROADCAST_EDGE] vectorized + BROADCAST [RS_979] + Please refer to the previous Group By Operator [GBY_978] + <-Reducer 36 [BROADCAST_EDGE] vectorized + BROADCAST [RS_982] + Group By Operator [GBY_981] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_966] + Group By Operator [GBY_964] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_961] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_956] + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_997] + Group By Operator [GBY_996] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized + FORWARD [RS_995] + Group By Operator [GBY_994] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_993] (rows=13257 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_991] + <-Reducer 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1043] PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_998] (rows=2299138 width=525) + Select Operator [SEL_1042] (rows=1434227 width=525) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_997] (rows=2299138 width=1362) + Group By Operator [GBY_1041] (rows=1434227 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 - <-Reducer 14 [SIMPLE_EDGE] + <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_191] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Group By Operator [GBY_190] (rows=2299138 width=1362) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col37)","sum(_col38)","sum(_col39)"],keys:_col26, _col40, _col27, _col7, _col8, _col9, _col10, _col13, _col15, _col21, _col22, _col23, _col24, _col41 - Select Operator [SEL_189] (rows=2331650 width=1292) - Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col21","_col22","_col23","_col24","_col26","_col27","_col37","_col38","_col39","_col40","_col41"] - Filter Operator [FIL_188] (rows=2331650 width=1292) - predicate:(_col45 <> _col17) - Merge Join Operator [MERGEJOIN_896] (rows=2331650 width=1292) - Conds:RS_185._col32=RS_928._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col37","_col38","_col39","_col40","_col41","_col45"] + Group By Operator [GBY_190] (rows=1434227 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col26)","sum(_col27)","sum(_col28)"],keys:_col37, _col29, _col38, _col40, _col41, _col42, _col43, _col7, _col8, _col9, _col10, _col13, _col15, _col30 + Merge Join Operator [MERGEJOIN_938] (rows=2364621 width=1153) + Conds:RS_186._col19, _col25=RS_1012._col0, _col1(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col26","_col27","_col28","_col29","_col30","_col37","_col38","_col40","_col41","_col42","_col43"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1012] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1010] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_186] + PartitionCols:_col19, _col25 + Merge Join Operator [MERGEJOIN_937] (rows=1434227 width=1030) + Conds:RS_183._col23=RS_946._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col19","_col25","_col26","_col27","_col28","_col29","_col30","_col37","_col38","_col40","_col41","_col42","_col43"] <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_928] + SHUFFLE [RS_946] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_925] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_185] - PartitionCols:_col32 - Merge Join Operator [MERGEJOIN_895] (rows=2299138 width=1205) - Conds:RS_182._col0=RS_183._col13(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col32","_col37","_col38","_col39","_col40","_col41"] - <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_182] + Please refer to the previous Select Operator [SEL_943] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_183] + PartitionCols:_col23 + Merge Join Operator [MERGEJOIN_936] (rows=1434227 width=669) + Conds:RS_180._col24=RS_1009._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col19","_col23","_col25","_col26","_col27","_col28","_col29","_col30","_col37","_col38"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1009] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_871] + Please refer to the previous Select Operator [SEL_1007] <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_183] - PartitionCols:_col13 - Select Operator [SEL_164] (rows=2651207 width=784) - Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col13","_col14","_col19","_col20","_col21","_col22","_col23"] - Merge Join Operator [MERGEJOIN_894] (rows=2651207 width=784) - Conds:RS_161._col1, _col7=RS_968._col0, _col1(Inner),Output:["_col2","_col3","_col8","_col9","_col10","_col11","_col12","_col17","_col18","_col20","_col21","_col22","_col23"] - <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_968] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_966] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_161] - PartitionCols:_col1, _col7 - Merge Join Operator [MERGEJOIN_893] (rows=1608052 width=657) - Conds:RS_158._col5=RS_904._col0(Inner),Output:["_col1","_col2","_col3","_col7","_col8","_col9","_col10","_col11","_col12","_col17","_col18","_col20","_col21","_col22","_col23"] - <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_904] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_901] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_158] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_892] (rows=1608052 width=296) - Conds:RS_155._col6=RS_965._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col17","_col18"] - <-Map 47 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_965] + SHUFFLE [RS_180] + PartitionCols:_col24 + Merge Join Operator [MERGEJOIN_935] (rows=1434227 width=492) + Conds:RS_177._col22=RS_951._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col19","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_951] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_948] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_177] + PartitionCols:_col22 + Merge Join Operator [MERGEJOIN_934] (rows=1434227 width=492) + Conds:RS_174._col19=RS_1030._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col19","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30"] + <-Reducer 45 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1030] + PartitionCols:_col0 + Select Operator [SEL_1029] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_1028] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1027] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 44 [SIMPLE_EDGE] + SHUFFLE [RS_141] + PartitionCols:_col0 + Group By Operator [GBY_140] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_933] (rows=183085709 width=227) + Conds:RS_1026._col0, _col1=RS_988._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_988] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_986] + <-Map 52 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1026] + PartitionCols:_col0, _col1 + Select Operator [SEL_1025] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1024] (rows=287989836 width=119) + predicate:(cs_item_sk BETWEEN DynamicValue(RS_121_item_i_item_sk_min) AND DynamicValue(RS_121_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_121_item_i_item_sk_bloom_filter))) + TableScan [TS_132] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1021] + Group By Operator [GBY_1019] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_977] + Group By Operator [GBY_975] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_973] (rows=518 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_969] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_174] + PartitionCols:_col19 + Filter Operator [FIL_173] (rows=1434227 width=662) + predicate:(_col17 <> _col33) + Merge Join Operator [MERGEJOIN_932] (rows=1434227 width=662) + Conds:RS_170._col1=RS_1005._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col19","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col33"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1005] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_963] - <-Reducer 24 [SIMPLE_EDGE] - SHUFFLE [RS_155] - PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_891] (rows=1608052 width=119) - Conds:RS_152._col4=RS_909._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_909] + Please refer to the previous Select Operator [SEL_1001] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_170] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_931] (rows=1414229 width=573) + Conds:RS_167._col0=RS_168._col4(Inner),Output:["_col1","_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col19","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_167] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_906] - <-Reducer 31 [SIMPLE_EDGE] - SHUFFLE [RS_152] + Please refer to the previous Merge Join Operator [MERGEJOIN_912] + <-Reducer 33 [SIMPLE_EDGE] + SHUFFLE [RS_168] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_890] (rows=1608052 width=119) - Conds:RS_149._col1=RS_986._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Reducer 45 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_986] - PartitionCols:_col0 - Select Operator [SEL_985] (rows=13257 width=4) - Output:["_col0"] - Filter Operator [FIL_984] (rows=13257 width=228) - predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_983] (rows=39773 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 44 [SIMPLE_EDGE] - SHUFFLE [RS_129] - PartitionCols:_col0 - Group By Operator [GBY_128] (rows=6482999 width=228) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 - Merge Join Operator [MERGEJOIN_889] (rows=183085709 width=227) - Conds:RS_982._col0, _col1=RS_949._col0, _col1(Inner),Output:["_col0","_col2","_col5"] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_949] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_947] - <-Map 51 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_982] - PartitionCols:_col0, _col1 - Select Operator [SEL_981] (rows=287989836 width=119) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_980] (rows=287989836 width=119) - predicate:(cs_item_sk BETWEEN DynamicValue(RS_144_item_i_item_sk_min) AND DynamicValue(RS_144_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_144_item_i_item_sk_bloom_filter))) - TableScan [TS_120] (rows=287989836 width=119) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 38 [BROADCAST_EDGE] vectorized - BROADCAST [RS_977] - Group By Operator [GBY_975] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_938] - Group By Operator [GBY_936] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_934] (rows=518 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_930] - <-Reducer 30 [SIMPLE_EDGE] - SHUFFLE [RS_149] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_888] (rows=1608052 width=119) - Conds:RS_146._col0=RS_919._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_919] - PartitionCols:_col0 - Select Operator [SEL_916] (rows=652 width=4) - Output:["_col0"] - Filter Operator [FIL_912] (rows=652 width=8) - predicate:(d_year = 2001) - Please refer to the previous TableScan [TS_8] - <-Reducer 37 [SIMPLE_EDGE] - SHUFFLE [RS_146] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_887] (rows=4503592 width=119) - Conds:RS_996._col1=RS_933._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - <-Map 35 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_933] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_930] - <-Map 50 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_996] - PartitionCols:_col1 - Select Operator [SEL_995] (rows=417313408 width=351) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_994] (rows=417313408 width=355) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_144_item_i_item_sk_min) AND DynamicValue(RS_144_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_144_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_150_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_150_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_150_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_194_item_i_item_sk_min) AND DynamicValue(RS_194_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_194_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_147_d1_d_date_sk_min) AND DynamicValue(RS_147_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_147_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_111] (rows=575995635 width=355) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 38 [BROADCAST_EDGE] vectorized - BROADCAST [RS_976] - Please refer to the previous Group By Operator [GBY_975] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_993] - Group By Operator [GBY_992] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 9 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_974] - Group By Operator [GBY_973] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_972] (rows=2299138 width=8) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_970] - <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_979] - Group By Operator [GBY_978] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_924] - Group By Operator [GBY_922] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_920] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_916] - <-Reducer 46 [BROADCAST_EDGE] vectorized - BROADCAST [RS_991] - Group By Operator [GBY_990] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Reducer 45 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_989] - Group By Operator [GBY_988] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_987] (rows=13257 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_985] + Select Operator [SEL_129] (rows=1630791 width=208) + Output:["_col1","_col3","_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Merge Join Operator [MERGEJOIN_930] (rows=1630791 width=208) + Conds:RS_126._col3=RS_1004._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + <-Map 38 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1004] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1001] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_126] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_929] (rows=1608052 width=119) + Conds:RS_123._col0=RS_962._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_962] + PartitionCols:_col0 + Select Operator [SEL_959] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_955] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_18] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_123] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_928] (rows=4503592 width=119) + Conds:RS_1040._col1=RS_972._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 29 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_972] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_969] + <-Map 51 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1040] + PartitionCols:_col1 + Select Operator [SEL_1039] (rows=417313408 width=351) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_1038] (rows=417313408 width=355) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_121_item_i_item_sk_min) AND DynamicValue(RS_121_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_121_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_175_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_175_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_175_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_194_item_i_item_sk_min) AND DynamicValue(RS_194_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_194_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_124_d1_d_date_sk_min) AND DynamicValue(RS_124_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_124_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_109] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 34 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1020] + Please refer to the previous Group By Operator [GBY_1019] + <-Reducer 16 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1037] + Group By Operator [GBY_1036] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1018] + Group By Operator [GBY_1017] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1016] (rows=1434227 width=8) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1014] + <-Reducer 37 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1023] + Group By Operator [GBY_1022] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 35 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_967] + Group By Operator [GBY_965] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_963] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_959] + <-Reducer 46 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1035] + Group By Operator [GBY_1034] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 45 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1033] + Group By Operator [GBY_1032] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1031] (rows=13257 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1029] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out index f27fbc9273..c17750dcdb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out @@ -81,217 +81,217 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Map 9 <- Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) -Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) -Reducer 12 <- Map 20 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 22 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Map 10 <- Reducer 18 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) +Reducer 12 <- Map 19 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 21 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Map 23 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Reducer 15 <- Map 24 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) -Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) -Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Reducer 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 25 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 26 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Reducer 5 (SIMPLE_EDGE) -Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 16 <- Map 25 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) +Reducer 7 <- Reducer 16 (ONE_TO_ONE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 26 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 7 vectorized - File Output Operator [FS_293] - Limit [LIM_292] (rows=100 width=312) + Reducer 4 vectorized + File Output Operator [FS_294] + Limit [LIM_293] (rows=100 width=312) Number of rows:100 - Select Operator [SEL_291] (rows=384313734 width=312) + Select Operator [SEL_292] (rows=384313734 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] - Group By Operator [GBY_289] (rows=384313734 width=312) + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_291] + Group By Operator [GBY_290] (rows=384313734 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 5 [SIMPLE_EDGE] + <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_63] (rows=610435044 width=312) + Group By Operator [GBY_63] (rows=1574305390 width=312) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col3)","count(_col4)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_61] (rows=1574305390 width=292) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_246] (rows=1574305390 width=292) - Conds:RS_58._col4, _col6=RS_288._col0, _col1(Left Outer),Output:["_col13","_col15","_col19","_col25"] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_288] + Merge Join Operator [MERGEJOIN_247] (rows=1574305390 width=292) + Conds:RS_249._col0, _col1=RS_59._col4, _col6(Right Outer),Output:["_col15","_col17","_col21","_col27"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_249] PartitionCols:_col0, _col1 - Select Operator [SEL_287] (rows=28798881 width=8) + Select Operator [SEL_248] (rows=28798881 width=8) Output:["_col0","_col1"] - TableScan [TS_56] (rows=28798881 width=8) + TableScan [TS_0] (rows=28798881 width=8) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_58] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_59] PartitionCols:_col4, _col6 - Select Operator [SEL_55] (rows=610435044 width=300) + Select Operator [SEL_57] (rows=610435044 width=300) Output:["_col4","_col6","_col13","_col15","_col19","_col25"] - Merge Join Operator [MERGEJOIN_245] (rows=610435044 width=300) - Conds:RS_52._col0, _col19=RS_286._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col19","_col23"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + Merge Join Operator [MERGEJOIN_246] (rows=610435044 width=300) + Conds:RS_54._col0, _col19=RS_289._col0, _col1(Inner),Output:["_col5","_col7","_col14","_col16","_col19","_col23"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] PartitionCols:_col0, _col1 - Select Operator [SEL_285] (rows=73049 width=8) + Select Operator [SEL_288] (rows=73049 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_284] (rows=73049 width=8) + Filter Operator [FIL_287] (rows=73049 width=8) predicate:d_week_seq is not null - TableScan [TS_42] (rows=73049 width=8) + TableScan [TS_44] (rows=73049 width=8) default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_52] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_54] PartitionCols:_col0, _col19 - Filter Operator [FIL_51] (rows=545947820 width=311) + Filter Operator [FIL_53] (rows=545947820 width=311) predicate:(_col3 < _col17) - Merge Join Operator [MERGEJOIN_244] (rows=1637843460 width=311) - Conds:RS_48._col1=RS_49._col8(Inner),Output:["_col0","_col3","_col5","_col9","_col14","_col16","_col17","_col19","_col23"] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_49] + Merge Join Operator [MERGEJOIN_245] (rows=1637843460 width=311) + Conds:RS_50._col1=RS_51._col8(Inner),Output:["_col0","_col3","_col5","_col7","_col14","_col16","_col17","_col19","_col23"] + <-Reducer 16 [ONE_TO_ONE_EDGE] + FORWARD [RS_51] PartitionCols:_col8 - Select Operator [SEL_41] (rows=2726340 width=219) - Output:["_col3","_col8","_col10","_col11","_col13","_col17"] - Filter Operator [FIL_40] (rows=2726340 width=219) - predicate:(_col17 > _col10) - Merge Join Operator [MERGEJOIN_243] (rows=8179022 width=219) - Conds:RS_37._col1=RS_283._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col13","_col15","_col17"] - <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_283] - PartitionCols:_col0 - Select Operator [SEL_282] (rows=73049 width=12) - Output:["_col0","_col1"] - TableScan [TS_20] (rows=73049 width=98) - default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] - <-Reducer 14 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_242] (rows=8179022 width=214) - Conds:RS_34._col4=RS_281._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col13","_col15"] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_281] - PartitionCols:_col0 - Select Operator [SEL_280] (rows=462000 width=188) - Output:["_col0","_col1"] - TableScan [TS_18] (rows=462000 width=188) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] - <-Reducer 13 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_241] (rows=8179022 width=30) - Conds:RS_31._col5=RS_279._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col13"] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_279] + Select Operator [SEL_43] (rows=2726340 width=203) + Output:["_col1","_col8","_col10","_col11","_col13","_col17"] + Merge Join Operator [MERGEJOIN_244] (rows=2726340 width=203) + Conds:RS_40._col4=RS_286._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col15","_col17"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_286] + PartitionCols:_col0 + Select Operator [SEL_285] (rows=462000 width=188) + Output:["_col0","_col1"] + TableScan [TS_22] (rows=462000 width=188) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_243] (rows=2726340 width=19) + Conds:RS_37._col5=RS_284._col0(Left Outer),Output:["_col4","_col6","_col7","_col9","_col15"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] + PartitionCols:_col0 + Select Operator [SEL_283] (rows=2300 width=4) + Output:["_col0"] + TableScan [TS_20] (rows=2300 width=4) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col5 + Filter Operator [FIL_36] (rows=2726340 width=34) + predicate:(_col14 > _col10) + Merge Join Operator [MERGEJOIN_242] (rows=8179022 width=34) + Conds:RS_33._col1=RS_282._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col9","_col10","_col14"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] PartitionCols:_col0 - Select Operator [SEL_278] (rows=2300 width=4) - Output:["_col0"] - TableScan [TS_16] (rows=2300 width=4) - default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] - <-Reducer 12 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_240] (rows=8179022 width=29) - Conds:RS_28._col3=RS_269._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 20 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_269] + Select Operator [SEL_281] (rows=73049 width=12) + Output:["_col0","_col1"] + TableScan [TS_18] (rows=73049 width=98) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_241] (rows=8179022 width=29) + Conds:RS_30._col3=RS_272._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_272] PartitionCols:_col0 - Select Operator [SEL_268] (rows=1440 width=4) + Select Operator [SEL_271] (rows=1440 width=4) Output:["_col0"] - Filter Operator [FIL_267] (rows=1440 width=96) + Filter Operator [FIL_270] (rows=1440 width=96) predicate:(hd_buy_potential = '1001-5000') - TableScan [TS_13] (rows=7200 width=96) + TableScan [TS_15] (rows=7200 width=96) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_28] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_30] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_239] (rows=40895108 width=35) - Conds:RS_25._col2=RS_261._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_261] + Merge Join Operator [MERGEJOIN_240] (rows=40895108 width=35) + Conds:RS_27._col2=RS_264._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] + <-Map 19 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_264] PartitionCols:_col0 - Select Operator [SEL_260] (rows=265971 width=4) + Select Operator [SEL_263] (rows=265971 width=4) Output:["_col0"] - Filter Operator [FIL_259] (rows=265971 width=89) + Filter Operator [FIL_262] (rows=265971 width=89) predicate:(cd_marital_status = 'M') - TableScan [TS_10] (rows=1861800 width=89) + TableScan [TS_12] (rows=1861800 width=89) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_25] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_238] (rows=100076475 width=39) - Conds:RS_277._col0=RS_253._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] - <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_253] + Merge Join Operator [MERGEJOIN_239] (rows=100076475 width=39) + Conds:RS_280._col0=RS_256._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_256] PartitionCols:_col0 - Select Operator [SEL_252] (rows=652 width=16) + Select Operator [SEL_255] (rows=652 width=16) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_251] (rows=652 width=106) + Filter Operator [FIL_254] (rows=652 width=106) predicate:((d_year = 2001) and d_week_seq is not null) - TableScan [TS_7] (rows=73049 width=106) + TableScan [TS_9] (rows=73049 width=106) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_week_seq","d_year"] - <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_277] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_280] PartitionCols:_col0 - Select Operator [SEL_276] (rows=282274763 width=31) + Select Operator [SEL_279] (rows=282274763 width=31) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_275] (rows=282274763 width=31) - predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_26_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_26_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_26_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_29_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_29_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_29_household_demographics_hd_demo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_23_d1_d_date_sk_min) AND DynamicValue(RS_23_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_4] (rows=287989836 width=31) + Filter Operator [FIL_278] (rows=282274763 width=31) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_28_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_28_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_28_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_31_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_31_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_31_household_demographics_hd_demo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_25_d1_d_date_sk_min) AND DynamicValue(RS_25_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_25_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_6] (rows=287989836 width=31) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] - <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_258] - Group By Operator [GBY_257] (rows=1 width=12) + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_261] + Group By Operator [GBY_260] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_259] + Group By Operator [GBY_258] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_254] (rows=652 width=4) + Select Operator [SEL_257] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_252] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_266] - Group By Operator [GBY_265] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_255] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_269] + Group By Operator [GBY_268] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_264] - Group By Operator [GBY_263] (rows=1 width=12) + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_267] + Group By Operator [GBY_266] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_262] (rows=265971 width=4) + Select Operator [SEL_265] (rows=265971 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_260] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_274] - Group By Operator [GBY_273] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_263] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_277] + Group By Operator [GBY_276] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_272] - Group By Operator [GBY_271] (rows=1 width=12) + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_275] + Group By Operator [GBY_274] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_270] (rows=1440 width=4) + Select Operator [SEL_273] (rows=1440 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_268] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_48] + Please refer to the previous Select Operator [SEL_271] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_50] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_237] (rows=37584000 width=111) - Conds:RS_248._col2=RS_250._col0(Inner),Output:["_col0","_col1","_col3","_col5"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + Merge Join Operator [MERGEJOIN_238] (rows=37584000 width=111) + Conds:RS_251._col2=RS_253._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_251] PartitionCols:_col2 - Select Operator [SEL_247] (rows=37584000 width=15) + Select Operator [SEL_250] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - TableScan [TS_0] (rows=37584000 width=15) + TableScan [TS_2] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_253] PartitionCols:_col0 - Select Operator [SEL_249] (rows=27 width=104) + Select Operator [SEL_252] (rows=27 width=104) Output:["_col0","_col1"] - TableScan [TS_2] (rows=27 width=104) + TableScan [TS_4] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query84.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query84.q.out index 8feac3e17b..2734835b64 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query84.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query84.q.out @@ -56,83 +56,83 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (SIMPLE_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 8 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 4 <- Reducer 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 8 <- Map 7 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 vectorized + Reducer 5 vectorized File Output Operator [FS_139] Limit [LIM_138] (rows=100 width=384) Number of rows:100 - Select Operator [SEL_137] (rows=255285 width=384) + Select Operator [SEL_137] (rows=255280 width=384) Output:["_col0","_col1"] - <-Reducer 3 [SIMPLE_EDGE] + <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_35] - Select Operator [SEL_34] (rows=255285 width=384) + Select Operator [SEL_34] (rows=255280 width=384) Output:["_col1","_col2"] - Merge Join Operator [MERGEJOIN_119] (rows=255285 width=284) - Conds:RS_31._col1=RS_32._col1(Inner),Output:["_col2","_col6"] - <-Reducer 2 [ONE_TO_ONE_EDGE] - FORWARD [RS_31] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_115] (rows=56363634 width=4) - Conds:RS_122._col0=RS_124._col0(Inner),Output:["_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - PartitionCols:_col0 - Select Operator [SEL_121] (rows=55577698 width=3) - Output:["_col0"] - Filter Operator [FIL_120] (rows=55577698 width=3) - predicate:sr_cdemo_sk is not null - TableScan [TS_0] (rows=57591150 width=3) - default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_cdemo_sk"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_124] - PartitionCols:_col0 - Select Operator [SEL_123] (rows=1861800 width=4) - Output:["_col0"] - TableScan [TS_3] (rows=1861800 width=4) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] - <-Reducer 8 [SIMPLE_EDGE] + Merge Join Operator [MERGEJOIN_119] (rows=255280 width=284) + Conds:RS_31._col4=RS_32._col0(Inner),Output:["_col2","_col6"] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_32] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_118] (rows=8315 width=284) - Conds:RS_24._col2=RS_25._col0(Inner),Output:["_col0","_col1","_col4"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_25] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_117] (rows=721 width=4) + Conds:RS_133._col1=RS_136._col0(Inner),Output:["_col0"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_133] + PartitionCols:_col1 + Select Operator [SEL_132] (rows=7200 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_131] (rows=7200 width=8) + predicate:hd_income_band_sk is not null + TableScan [TS_15] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_136] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_117] (rows=721 width=4) - Conds:RS_133._col1=RS_136._col0(Inner),Output:["_col0"] - <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] - PartitionCols:_col1 - Select Operator [SEL_132] (rows=7200 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_131] (rows=7200 width=8) - predicate:hd_income_band_sk is not null - TableScan [TS_11] (rows=7200 width=8) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + Select Operator [SEL_135] (rows=2 width=4) + Output:["_col0"] + Filter Operator [FIL_134] (rows=2 width=12) + predicate:((ib_lower_bound >= 32287) and (ib_upper_bound <= 82287)) + TableScan [TS_18] (rows=20 width=12) + default@income_band,income_band,Tbl:COMPLETE,Col:COMPLETE,Output:["ib_income_band_sk","ib_lower_bound","ib_upper_bound"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_118] (rows=2552796 width=287) + Conds:RS_28._col1=RS_29._col1(Inner),Output:["_col2","_col4","_col6"] + <-Reducer 2 [ONE_TO_ONE_EDGE] + FORWARD [RS_28] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_115] (rows=56363634 width=4) + Conds:RS_122._col0=RS_124._col0(Inner),Output:["_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_135] (rows=2 width=4) + Select Operator [SEL_121] (rows=55577698 width=3) Output:["_col0"] - Filter Operator [FIL_134] (rows=2 width=12) - predicate:((ib_lower_bound >= 32287) and (ib_upper_bound <= 82287)) - TableScan [TS_14] (rows=20 width=12) - default@income_band,income_band,Tbl:COMPLETE,Col:COMPLETE,Output:["ib_income_band_sk","ib_lower_bound","ib_upper_bound"] - <-Reducer 7 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col2 + Filter Operator [FIL_120] (rows=55577698 width=3) + predicate:sr_cdemo_sk is not null + TableScan [TS_0] (rows=57591150 width=3) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_cdemo_sk"] + <-Map 6 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_124] + PartitionCols:_col0 + Select Operator [SEL_123] (rows=1861800 width=4) + Output:["_col0"] + TableScan [TS_3] (rows=1861800 width=4) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1 Merge Join Operator [MERGEJOIN_116] (rows=83148 width=284) Conds:RS_127._col3=RS_130._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 6 [SIMPLE_EDGE] vectorized + <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_127] PartitionCols:_col3 Select Operator [SEL_126] (rows=74500295 width=295)