diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java index 9aa30129b6..322e925843 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelOptUtil.java @@ -17,12 +17,14 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.google.common.collect.Sets; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; @@ -62,6 +64,7 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.commons.lang3.tuple.Triple; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -536,7 +539,183 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper return false; } - public static Pair> isRewritablePKFKJoin(RelBuilder builder, Join join, + /** + * Returns a triple where first value represents whether we could extract a FK-PK join + * or not, the second value is a pair with the column from left and right input that + * are used for the FK-PK join, and the third value are the predicates that are not + * part of the FK-PK condition. Currently we can only extract one FK-PK join. + */ + public static Triple, List> extractPKFKJoin( + Join join, List joinFilters, boolean leftInputPotentialFK, RelMetadataQuery mq) { + final List residualPreds = new ArrayList<>(); + final JoinRelType joinType = join.getJoinType(); + final RelNode fkInput = leftInputPotentialFK ? join.getLeft() : join.getRight(); + final Triple, List> cannotExtract = + Triple.of(false, null, null); + + if (joinType != JoinRelType.INNER) { + // If it is not an inner, we transform it as the metadata + // providers for expressions do not pull information through + // outer join (as it would not be correct) + join = join.copy(join.getTraitSet(), join.getCluster().getRexBuilder().makeLiteral(true), + join.getLeft(), join.getRight(), JoinRelType.INNER, false); + } + + // 1) Gather all tables from the FK side and the table from the + // non-FK side + final Set leftTables = mq.getTableReferences(join.getLeft()); + final Set rightTables = + Sets.difference(mq.getTableReferences(join), mq.getTableReferences(join.getLeft())); + final Set fkTables = join.getLeft() == fkInput ? leftTables : rightTables; + final Set nonFkTables = join.getLeft() == fkInput ? rightTables : leftTables; + if (nonFkTables.size() != 1) { + // More than one table in PK side, we bail out + return cannotExtract; + } + + // 2) Check whether there is a FK relationship + Set candidatePredicates = new HashSet<>(); + EquivalenceClasses ec = new EquivalenceClasses(); + for (RexNode conj : joinFilters) { + if (!conj.isA(SqlKind.EQUALS)) { + // Not an equality, continue + residualPreds.add(conj); + continue; + } + RexCall equiCond = (RexCall) conj; + RexNode eqOp1 = equiCond.getOperands().get(0); + if (!RexUtil.isReferenceOrAccess(eqOp1, true)) { + // Ignore + residualPreds.add(conj); + continue; + } + Set eqOp1ExprsLineage = mq.getExpressionLineage(join, eqOp1); + if (eqOp1ExprsLineage == null) { + // Cannot be mapped, continue + residualPreds.add(conj); + continue; + } + RexNode eqOp2 = equiCond.getOperands().get(1); + if (!RexUtil.isReferenceOrAccess(eqOp2, true)) { + // Ignore + residualPreds.add(conj); + continue; + } + Set eqOp2ExprsLineage = mq.getExpressionLineage(join, eqOp2); + if (eqOp2ExprsLineage == null) { + // Cannot be mapped, continue + residualPreds.add(conj); + continue; + } + List eqOp2ExprsFiltered = null; + for (RexNode eqOpExprLineage1 : eqOp1ExprsLineage) { + RexTableInputRef inputRef1 = extractTableInputRef(eqOpExprLineage1); + if (inputRef1 == null) { + // This condition could not be map into an input reference + continue; + } + if (eqOp2ExprsFiltered == null) { + // First iteration + eqOp2ExprsFiltered = new ArrayList<>(); + for (RexNode eqOpExprLineage2 : eqOp2ExprsLineage) { + RexTableInputRef inputRef2 = extractTableInputRef(eqOpExprLineage2); + if (inputRef2 == null) { + // Bail out as this condition could not be map into an input reference + continue; + } + // Add to list of expressions for follow-up iterations + eqOp2ExprsFiltered.add(inputRef2); + // Add to equivalence classes and backwards mapping + ec.addEquivalence(inputRef1, inputRef2, equiCond); + candidatePredicates.add(equiCond); + } + } else { + // Rest of iterations, only adding, no checking + for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { + ec.addEquivalence(inputRef1, inputRef2, equiCond); + } + } + } + if (!candidatePredicates.contains(conj)) { + // We add it to residual already + residualPreds.add(conj); + } + } + if (ec.getEquivalenceClassesMap().isEmpty()) { + // This may be a cartesian product, we bail out + return cannotExtract; + } + + // 4) For each table, check whether there is a matching on the non-FK side. + // If there is and it is the only condition, we are ready to transform + final RelTableRef nonFkTable = nonFkTables.iterator().next(); + final List nonFkTableQName = nonFkTable.getQualifiedName(); + for (RelTableRef tRef : fkTables) { + List constraints = tRef.getTable().getReferentialConstraints(); + for (RelReferentialConstraint constraint : constraints) { + if (constraint.getTargetQualifiedName().equals(nonFkTableQName)) { + EquivalenceClasses ecT = EquivalenceClasses.copy(ec); + Set removedOriginalPredicates = new HashSet<>(); + ImmutableBitSet.Builder lBitSet = ImmutableBitSet.builder(); + ImmutableBitSet.Builder rBitSet = ImmutableBitSet.builder(); + boolean allContained = true; + for (int pos = 0; pos < constraint.getNumColumns(); pos++) { + int foreignKeyPos = constraint.getColumnPairs().get(pos).source; + RelDataType foreignKeyColumnType = + tRef.getTable().getRowType().getFieldList().get(foreignKeyPos).getType(); + RexTableInputRef foreignKeyColumnRef = + RexTableInputRef.of(tRef, foreignKeyPos, foreignKeyColumnType); + int uniqueKeyPos = constraint.getColumnPairs().get(pos).target; + RexTableInputRef uniqueKeyColumnRef = RexTableInputRef.of(nonFkTable, uniqueKeyPos, + nonFkTable.getTable().getRowType().getFieldList().get(uniqueKeyPos).getType()); + if (ecT.getEquivalenceClassesMap().containsKey(uniqueKeyColumnRef) && + ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).contains(foreignKeyColumnRef)) { + // Remove this condition from eq classes as we have checked that it is present + // in the join condition. In turn, populate the columns that are referenced + // from the join inputs + for (RexCall originalPred : ecT.removeEquivalence(uniqueKeyColumnRef, foreignKeyColumnRef)) { + ImmutableBitSet leftCols = RelOptUtil.InputFinder.bits(originalPred.getOperands().get(0)); + ImmutableBitSet rightCols = RelOptUtil.InputFinder.bits(originalPred.getOperands().get(1)); + // Get length and flip column references if join condition specified in + // reverse order to join sources + int nFieldsLeft = join.getLeft().getRowType().getFieldList().size(); + int nFieldsRight = join.getRight().getRowType().getFieldList().size(); + int nSysFields = join.getSystemFieldList().size(); + ImmutableBitSet rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft, + nSysFields + nFieldsLeft + nFieldsRight); + if (rightFieldsBitSet.contains(leftCols)) { + ImmutableBitSet t = leftCols; + leftCols = rightCols; + rightCols = t; + } + lBitSet.set(leftCols.nextSetBit(0) - nSysFields); + rBitSet.set(rightCols.nextSetBit(0) - (nSysFields + nFieldsLeft)); + removedOriginalPredicates.add(originalPred); + } + } else { + // No relationship, we cannot do anything + allContained = false; + break; + } + } + if (allContained) { + // This is a PK-FK, reassign equivalence classes and remove conditions + // TODO: Support inference of multiple PK-FK relationships + + // 4.1) Add to residual whatever is remaining + candidatePredicates.removeAll(removedOriginalPredicates); + residualPreds.addAll(candidatePredicates); + // 4.2) Return result + return Triple.of(true, Pair.of(lBitSet.build(), rBitSet.build()), residualPreds); + } + } + } + } + + return cannotExtract; + } + + public static Pair> isRewritablePKFKJoin(Join join, boolean leftInputPotentialFK, RelMetadataQuery mq) { final JoinRelType joinType = join.getJoinType(); final RexNode cond = join.getCondition(); @@ -548,10 +727,9 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper // If it is not an inner, we transform it as the metadata // providers for expressions do not pull information through // outer join (as it would not be correct) - join = (Join) builder - .push(join.getLeft()).push(join.getRight()) - .join(JoinRelType.INNER, cond) - .build(); + join = join.copy(join.getTraitSet(), cond, + join.getLeft(), join.getRight(), JoinRelType.INNER, + false); } // 1) Check whether there is any filtering condition on the @@ -602,13 +780,13 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper // Add to list of expressions for follow-up iterations eqOp2ExprsFiltered.add(inputRef2); // Add to equivalence classes and backwards mapping - ec.addEquivalenceClass(inputRef1, inputRef2); + ec.addEquivalence(inputRef1, inputRef2); refToRex.put(inputRef2, eqOp2); } } else { // Rest of iterations, only adding, no checking for (RexTableInputRef inputRef2 : eqOp2ExprsFiltered) { - ec.addEquivalenceClass(inputRef1, inputRef2); + ec.addEquivalence(inputRef1, inputRef2); } } } @@ -665,14 +843,7 @@ public static boolean isRowFilteringPlan(final RelMetadataQuery mq, RelNode oper } // Remove this condition from eq classes as we have checked that it is present // in the join condition - ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).remove(foreignKeyColumnRef); - if (ecT.getEquivalenceClassesMap().get(uniqueKeyColumnRef).size() == 1) { // self - ecT.getEquivalenceClassesMap().remove(uniqueKeyColumnRef); - } - ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).remove(uniqueKeyColumnRef); - if (ecT.getEquivalenceClassesMap().get(foreignKeyColumnRef).size() == 1) { // self - ecT.getEquivalenceClassesMap().remove(foreignKeyColumnRef); - } + ecT.removeEquivalence(uniqueKeyColumnRef, foreignKeyColumnRef); } else { // No relationship, we cannot do anything allContained = false; @@ -711,13 +882,23 @@ private static RexTableInputRef extractTableInputRef(RexNode node) { */ private static class EquivalenceClasses { + // Contains the node to equivalence class nodes private final Map> nodeToEquivalenceClass; + // Contains the pair of equivalences to original expression that they originate from + private final Multimap, RexCall> equivalenceToOriginalNode; protected EquivalenceClasses() { nodeToEquivalenceClass = new HashMap<>(); + equivalenceToOriginalNode = HashMultimap.create(); } - protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { + protected void addEquivalence(RexTableInputRef p1, RexTableInputRef p2, RexCall originalCond) { + addEquivalence(p1, p2); + equivalenceToOriginalNode.put(Pair.of(p1, p2), originalCond); + equivalenceToOriginalNode.put(Pair.of(p2, p1), originalCond); + } + + protected void addEquivalence(RexTableInputRef p1, RexTableInputRef p2) { Set c1 = nodeToEquivalenceClass.get(p1); Set c2 = nodeToEquivalenceClass.get(p2); if (c1 != null && c2 != null) { @@ -754,11 +935,30 @@ protected void addEquivalenceClass(RexTableInputRef p1, RexTableInputRef p2) { return nodeToEquivalenceClass; } + // Returns the original nodes that the equivalences were generated from + protected Set removeEquivalence(RexTableInputRef p1, RexTableInputRef p2) { + nodeToEquivalenceClass.get(p1).remove(p2); + if (nodeToEquivalenceClass.get(p1).size() == 1) { // self + nodeToEquivalenceClass.remove(p1); + } + nodeToEquivalenceClass.get(p2).remove(p1); + if (nodeToEquivalenceClass.get(p2).size() == 1) { // self + nodeToEquivalenceClass.remove(p2); + } + Set originalNodes = new HashSet<>(); + originalNodes.addAll(equivalenceToOriginalNode.removeAll(Pair.of(p1, p2))); + originalNodes.addAll(equivalenceToOriginalNode.removeAll(Pair.of(p2, p1))); + return originalNodes; + } + protected static EquivalenceClasses copy(EquivalenceClasses ec) { final EquivalenceClasses newEc = new EquivalenceClasses(); for (Entry> e : ec.nodeToEquivalenceClass.entrySet()) { newEc.nodeToEquivalenceClass.put(e.getKey(), Sets.newLinkedHashSet(e.getValue())); } + for (Entry, Collection> e : ec.equivalenceToOriginalNode.asMap().entrySet()) { + newEc.equivalenceToOriginalNode.putAll(e.getKey(), e.getValue()); + } return newEc; } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java index 534a5c9531..802d318fcd 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinConstraintsRule.java @@ -220,7 +220,7 @@ public void onMatch(RelOptRuleCall call) { } // 2) Check whether this join can be rewritten or removed - Pair> r = HiveRelOptUtil.isRewritablePKFKJoin(call.builder(), + Pair> r = HiveRelOptUtil.isRewritablePKFKJoin( join, leftInput == fkInput, call.getMetadataQuery()); // 3) If it is the only condition, we can trigger the rewriting diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 576ed34bf3..f1f9b670cc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -34,6 +34,7 @@ import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdRowCount; +import org.apache.calcite.rel.metadata.RelMdUtil; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rex.RexBuilder; @@ -46,6 +47,8 @@ import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.Pair; +import org.apache.commons.lang3.tuple.Triple; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -63,15 +66,33 @@ protected HiveRelMdRowCount() { } public Double getRowCount(Join join, RelMetadataQuery mq) { + // Try to infer from constraints first + Pair constraintBasedResult = + constraintsBasedAnalyzeJoinForPKFK(join, mq); + if (constraintBasedResult != null) { + // We succeeded, we calculate the selectivity based on the inferred information + // and any residual predicate + double joinSelectivity = Math.min(1.0, + constraintBasedResult.left.pkInfo.selectivity * constraintBasedResult.left.ndvScalingFactor); + double residualSelectivity = RelMdUtil.guessSelectivity(constraintBasedResult.right); + double rowCount = constraintBasedResult.left.fkInfo.rowCount * joinSelectivity * residualSelectivity; + if (LOG.isDebugEnabled()) { + LOG.debug("Identified Primary - Foreign Key relation from constraints:\n {} {} Row count for join: {}\n", + RelOptUtil.toString(join), constraintBasedResult.left, rowCount); + } + return rowCount; + } + // Otherwise, try to infer from stats PKFKRelationInfo pkfk = analyzeJoinForPKFK(join, mq); if (pkfk != null) { - double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); + double selectivity = pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor; selectivity = Math.min(1.0, selectivity); if (LOG.isDebugEnabled()) { LOG.debug("Identified Primary - Foreign Key relation: {} {}",RelOptUtil.toString(join), pkfk); } return pkfk.fkInfo.rowCount * selectivity; } + // If we cannot infer anything, then we just go to join.estimateRowCount(mq). // Do not call mq.getRowCount(join), will trigger CyclicMetadataException return join.estimateRowCount(mq); } @@ -80,7 +101,7 @@ public Double getRowCount(Join join, RelMetadataQuery mq) { public Double getRowCount(SemiJoin rel, RelMetadataQuery mq) { PKFKRelationInfo pkfk = analyzeJoinForPKFK(rel, mq); if (pkfk != null) { - double selectivity = (pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor); + double selectivity = pkfk.pkInfo.selectivity * pkfk.ndvScalingFactor; selectivity = Math.min(1.0, selectivity); if (LOG.isDebugEnabled()) { LOG.debug("Identified Primary - Foreign Key relation: {} {}", RelOptUtil.toString(rel), pkfk); @@ -217,10 +238,10 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery int rightColIdx = joinCols.right; RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); - RexNode leftPred = RexUtil - .composeConjunction(rexBuilder, leftFilters, true); - RexNode rightPred = RexUtil.composeConjunction(rexBuilder, rightFilters, - true); + RexNode leftPred = RexUtil.composeConjunction( + rexBuilder, leftFilters, true); + RexNode rightPred = RexUtil.composeConjunction( + rexBuilder, rightFilters, true); ImmutableBitSet lBitSet = ImmutableBitSet.of(leftColIdx); ImmutableBitSet rBitSet = ImmutableBitSet.of(rightColIdx); @@ -228,11 +249,10 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery * If the form is Dim loj F or Fact roj Dim or Dim semij Fact then return * null. */ - boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel - .getJoinType() == JoinRelType.RIGHT) - && !(joinRel instanceof SemiJoin) && isKey(lBitSet, left, mq); - boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel - .getJoinType() == JoinRelType.LEFT) && isKey(rBitSet, right, mq); + boolean leftIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.getJoinType() == JoinRelType.RIGHT) + && isKey(lBitSet, left, mq); + boolean rightIsKey = (joinRel.getJoinType() == JoinRelType.INNER || joinRel.getJoinType() == JoinRelType.LEFT) + && isKey(rBitSet, right, mq); if (!leftIsKey && !rightIsKey) { return null; @@ -247,41 +267,37 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery } } - int pkSide = leftIsKey ? 0 : rightIsKey ? 1 : -1; - - boolean isPKSideSimpleTree = pkSide != -1 ? - IsSimpleTreeOnJoinKey.check( - pkSide == 0 ? left : right, - pkSide == 0 ? leftColIdx : rightColIdx, mq) : false; - - double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1; - double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1; - - /* - * If the ndv of the PK - FK side don't match, and the PK side is a filter - * on the Key column then scale the NDV on the FK side. - * - * As described by Peter Boncz: http://databasearchitects.blogspot.com/ - * in such cases we can be off by a large margin in the Join cardinality - * estimate. The e.g. he provides is on the join of StoreSales and DateDim - * on the TPCDS dataset. Since the DateDim is populated for 20 years into - * the future, while the StoreSales only has 5 years worth of data, there - * are 40 times fewer distinct dates in StoreSales. - * - * In general it is hard to infer the range for the foreign key on an - * arbitrary expression. For e.g. the NDV for DayofWeek is the same - * irrespective of NDV on the number of unique days, whereas the - * NDV of Quarters has the same ratio as the NDV on the keys. - * - * But for expressions that apply only on columns that have the same NDV - * as the key (implying that they are alternate keys) we can apply the - * ratio. So in the case of StoreSales - DateDim joins for predicate on the - * d_date column we can apply the scaling factor. - */ - double ndvScalingFactor = 1.0; - if ( isPKSideSimpleTree ) { - ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV; - } + int pkSide = leftIsKey ? 0 : 1; + boolean isPKSideSimpleTree = leftIsKey ? SimpleTreeOnJoinKey.check(false, left, lBitSet, mq) : + SimpleTreeOnJoinKey.check(false, right, rBitSet, mq); + double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1; + double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1; + + /* + * If the ndv of the PK - FK side don't match, and the PK side is a filter + * on the Key column then scale the NDV on the FK side. + * + * As described by Peter Boncz: http://databasearchitects.blogspot.com/ + * in such cases we can be off by a large margin in the Join cardinality + * estimate. The e.g. he provides is on the join of StoreSales and DateDim + * on the TPCDS dataset. Since the DateDim is populated for 20 years into + * the future, while the StoreSales only has 5 years worth of data, there + * are 40 times fewer distinct dates in StoreSales. + * + * In general it is hard to infer the range for the foreign key on an + * arbitrary expression. For e.g. the NDV for DayofWeek is the same + * irrespective of NDV on the number of unique days, whereas the + * NDV of Quarters has the same ratio as the NDV on the keys. + * + * But for expressions that apply only on columns that have the same NDV + * as the key (implying that they are alternate keys) we can apply the + * ratio. So in the case of StoreSales - DateDim joins for predicate on the + * d_date column we can apply the scaling factor. + */ + double ndvScalingFactor = 1.0; + if ( isPKSideSimpleTree ) { + ndvScalingFactor = pkSide == 0 ? leftNDV/rightNDV : rightNDV / leftNDV; + } if (pkSide == 0) { FKSideInfo fkInfo = new FKSideInfo(rightRowCount, @@ -293,9 +309,7 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery pkSelectivity); return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); - } - - if (pkSide == 1) { + } else { // pkSide == 1 FKSideInfo fkInfo = new FKSideInfo(leftRowCount, leftNDV); double pkSelectivity = pkSelectivity(joinRel, mq, false, right, rightRowCount); @@ -304,10 +318,114 @@ public static PKFKRelationInfo analyzeJoinForPKFK(Join joinRel, RelMetadataQuery joinRel.getJoinType().generatesNullsOnLeft() ? 1.0 : pkSelectivity); - return new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); + return new PKFKRelationInfo(0, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree); + } + } + + /* + * + */ + public static Pair constraintsBasedAnalyzeJoinForPKFK(Join join, RelMetadataQuery mq) { + + if (join instanceof SemiJoin) { + // TODO: Support semijoin + return null; } - return null; + final RelNode left = join.getInputs().get(0); + final RelNode right = join.getInputs().get(1); + + // 1) Split filters in conjuncts + final List condConjs = RelOptUtil.conjunctions( + join.getCondition()); + + if (condConjs.isEmpty()) { + // Bail out + return null; + } + + // 2) Classify filters depending on their provenance + final List joinFilters = new ArrayList<>(condConjs); + final List leftFilters = new ArrayList<>(); + final List rightFilters = new ArrayList<>(); + RelOptUtil.classifyFilters(join, joinFilters, join.getJoinType(),false, + !join.getJoinType().generatesNullsOnRight(), !join.getJoinType().generatesNullsOnLeft(), + joinFilters, leftFilters, rightFilters); + + // 3) Check if we are joining on PK-FK + final Triple, List> leftInputResult = + HiveRelOptUtil.extractPKFKJoin(join, joinFilters, false, mq); + final Triple, List> rightInputResult = + HiveRelOptUtil.extractPKFKJoin(join, joinFilters, true, mq); + if (leftInputResult == null && rightInputResult == null) { + // Nothing to do here, bail out + return null; + } + + boolean leftIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.RIGHT) + && leftInputResult.getLeft(); + boolean rightIsKey = (join.getJoinType() == JoinRelType.INNER || join.getJoinType() == JoinRelType.LEFT) + && rightInputResult.getLeft(); + if (!leftIsKey && !rightIsKey) { + // Nothing to do here, bail out + return null; + } + final double leftRowCount = mq.getRowCount(left); + final double rightRowCount = mq.getRowCount(right); + if (leftIsKey && rightIsKey) { + if (rightRowCount < leftRowCount) { + leftIsKey = false; + } + } + final ImmutableBitSet lBitSet = leftIsKey ? leftInputResult.getMiddle().left : rightInputResult.getMiddle().left; + final ImmutableBitSet rBitSet = leftIsKey ? leftInputResult.getMiddle().right : rightInputResult.getMiddle().right; + final List residualFilters = leftIsKey ? leftInputResult.getRight() : rightInputResult.getRight(); + + // 4) Extract additional information on the PK-FK relationship + int pkSide = leftIsKey ? 0 : 1; + boolean isPKSideSimpleTree = leftIsKey ? SimpleTreeOnJoinKey.check(true, left, lBitSet, mq) : + SimpleTreeOnJoinKey.check(true, right, rBitSet, mq); + RexBuilder rexBuilder = join.getCluster().getRexBuilder(); + RexNode leftPred = RexUtil.composeConjunction( + rexBuilder, leftFilters, true); + RexNode rightPred = RexUtil.composeConjunction( + rexBuilder, rightFilters, true); + double leftNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(left, lBitSet, leftPred) : -1; + double rightNDV = isPKSideSimpleTree ? mq.getDistinctRowCount(right, rBitSet, rightPred) : -1; + + // 5) Add the rest of operators back to the join filters + // and create residual condition + RexNode residualCond = residualFilters.isEmpty() ? null : + residualFilters.size() == 1 ? residualFilters.get(0) : + rexBuilder.makeCall(SqlStdOperatorTable.AND, residualFilters); + + // 6) Return result + if (pkSide == 0) { + FKSideInfo fkInfo = new FKSideInfo(rightRowCount, + rightNDV); + double pkSelectivity = pkSelectivity(join, mq, true, left, leftRowCount); + PKSideInfo pkInfo = new PKSideInfo(leftRowCount, + leftNDV, + join.getJoinType().generatesNullsOnRight() ? 1.0 : + pkSelectivity); + double ndvScalingFactor = isPKSideSimpleTree ? leftNDV/rightNDV : 1.0; + if (isPKSideSimpleTree) { + ndvScalingFactor = leftNDV/rightNDV; + } + return Pair.of(new PKFKRelationInfo(1, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree), + residualCond); + } else { // pkSide == 1 + FKSideInfo fkInfo = new FKSideInfo(leftRowCount, + leftNDV); + double pkSelectivity = pkSelectivity(join, mq, false, right, rightRowCount); + PKSideInfo pkInfo = new PKSideInfo(rightRowCount, + rightNDV, + join.getJoinType().generatesNullsOnLeft() ? 1.0 : + pkSelectivity); + double ndvScalingFactor = isPKSideSimpleTree ? rightNDV/leftNDV : 1.0; + return Pair.of(new PKFKRelationInfo(0, fkInfo, pkInfo, ndvScalingFactor, isPKSideSimpleTree), + residualCond); + } } private static double pkSelectivity(Join joinRel, RelMetadataQuery mq, boolean leftChild, @@ -402,20 +520,22 @@ private static boolean isKey(ImmutableBitSet c, RelNode rel, RelMetadataQuery mq return new Pair(leftColIdx, rightColIdx); } - private static class IsSimpleTreeOnJoinKey extends RelVisitor { + private static class SimpleTreeOnJoinKey extends RelVisitor { - int joinKey; + boolean constraintsBased; + ImmutableBitSet joinKey; boolean simpleTree; RelMetadataQuery mq; - static boolean check(RelNode r, int joinKey, RelMetadataQuery mq) { - IsSimpleTreeOnJoinKey v = new IsSimpleTreeOnJoinKey(joinKey, mq); + static boolean check(boolean constraintsBased, RelNode r, ImmutableBitSet joinKey, RelMetadataQuery mq) { + SimpleTreeOnJoinKey v = new SimpleTreeOnJoinKey(constraintsBased, joinKey, mq); v.go(r); return v.simpleTree; } - IsSimpleTreeOnJoinKey(int joinKey, RelMetadataQuery mq) { + SimpleTreeOnJoinKey(boolean constraintsBased, ImmutableBitSet joinKey, RelMetadataQuery mq) { super(); + this.constraintsBased = constraintsBased; this.joinKey = joinKey; this.mq = mq; simpleTree = true; @@ -444,16 +564,23 @@ public void visit(RelNode node, int ordinal, RelNode parent) { } private boolean isSimple(Project project) { - RexNode r = project.getProjects().get(joinKey); - if (r instanceof RexInputRef) { - joinKey = ((RexInputRef) r).getIndex(); - return true; + ImmutableBitSet.Builder b = ImmutableBitSet.builder(); + for (int pos : joinKey) { + RexNode r = project.getProjects().get(pos); + if (!(r instanceof RexInputRef)) { + return false; + } + b.set(((RexInputRef) r).getIndex()); } - return false; + joinKey = b.build(); + return true; } private boolean isSimple(Filter filter, RelMetadataQuery mq) { ImmutableBitSet condBits = RelOptUtil.InputFinder.bits(filter.getCondition()); + if (constraintsBased) { + return mq.areColumnsUnique(filter, condBits); + } return isKey(condBits, filter, mq); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index 575902d78d..7e9208229a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -98,8 +98,7 @@ private Double computeInnerJoinSelectivity(Join j, RelMetadataQuery mq, RexNode } catch (CalciteSemanticException e) { throw new RuntimeException(e); } - ImmutableMap.Builder colStatMapBuilder = ImmutableMap - .builder(); + ImmutableMap.Builder colStatMapBuilder = ImmutableMap.builder(); ImmutableMap colStatMap; int rightOffSet = j.getLeft().getRowType().getFieldCount(); diff --git a/ql/src/test/queries/clientpositive/perf/cbo_query24.q b/ql/src/test/queries/clientpositive/perf/cbo_query24.q index 02bcbafb7e..4be79f99a0 100644 --- a/ql/src/test/queries/clientpositive/perf/cbo_query24.q +++ b/ql/src/test/queries/clientpositive/perf/cbo_query24.q @@ -24,7 +24,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and ca_address_sk = c_current_addr_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name diff --git a/ql/src/test/queries/clientpositive/perf/query24.q b/ql/src/test/queries/clientpositive/perf/query24.q index 007d7ee415..8060402756 100644 --- a/ql/src/test/queries/clientpositive/perf/query24.q +++ b/ql/src/test/queries/clientpositive/perf/query24.q @@ -24,7 +24,8 @@ where ss_ticket_number = sr_ticket_number and ss_customer_sk = c_customer_sk and ss_item_sk = i_item_sk and ss_store_sk = s_store_sk - and c_birth_country = upper(ca_country) + and ca_address_sk = c_current_addr_sk + and c_birth_country <> upper(ca_country) and s_zip = ca_zip and s_market_id=7 group by c_last_name diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out index 02966e4f47..ce7a40fa21 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query13.q.out @@ -115,6 +115,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +<<<<<<< HEAD Map 8 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) @@ -125,17 +126,35 @@ Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 1 (CUSTOM_SIMPLE_EDGE) +======= +Map 10 <- Reducer 12 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) +Reducer 8 <- Map 11 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 6 (CUSTOM_SIMPLE_EDGE) +>>>>>>> d59ddb6e0a... cost model changes Stage-0 Fetch Operator limit:-1 Stage-1 +<<<<<<< HEAD Reducer 6 vectorized File Output Operator [FS_135] Select Operator [SEL_134] (rows=1 width=344) +======= + Reducer 4 vectorized + File Output Operator [FS_131] + Select Operator [SEL_130] (rows=1 width=344) +>>>>>>> d59ddb6e0a... cost model changes Output:["_col0","_col1","_col2","_col3"] Group By Operator [GBY_133] (rows=1 width=256) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] +<<<<<<< HEAD <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_31] Group By Operator [GBY_30] (rows=1 width=256) @@ -146,9 +165,22 @@ Stage-0 predicate:((_col22 and _col23 and _col11 and _col15) or (_col24 and _col25 and _col12 and _col16) or (_col26 and _col27 and _col13 and _col16)) Merge Join Operator [MERGEJOIN_97] (rows=218403 width=44) Conds:RS_25._col2=RS_124._col0(Inner),Output:["_col5","_col6","_col7","_col11","_col12","_col13","_col15","_col16","_col22","_col23","_col24","_col25","_col26","_col27"] +======= + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=1 width=256) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"] + Select Operator [SEL_30] (rows=100857 width=24) + Output:["_col3","_col4","_col5"] + Filter Operator [FIL_29] (rows=100857 width=24) + predicate:((_col10 and _col6) or (_col11 and _col7) or (_col12 and _col8)) + Merge Join Operator [MERGEJOIN_98] (rows=134477 width=24) + Conds:RS_26._col2=RS_128._col0(Inner),Output:["_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col11","_col12"] +>>>>>>> d59ddb6e0a... cost model changes <-Map 13 [SIMPLE_EDGE] vectorized SHUFFLE [RS_124] PartitionCols:_col0 +<<<<<<< HEAD Select Operator [SEL_123] (rows=265971 width=28) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] Filter Operator [FIL_122] (rows=265971 width=183) @@ -252,4 +284,100 @@ Stage-0 Select Operator [SEL_101] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_99] +======= + Select Operator [SEL_127] (rows=20000000 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_126] (rows=20000000 width=187) + predicate:(ca_country = 'United States') + TableScan [TS_20] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_97] (rows=268953 width=12) + Conds:RS_101._col0=RS_24._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_100] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_99] (rows=652 width=8) + predicate:(d_year = 2001) + TableScan [TS_0] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=753243 width=12) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_18] (rows=753243 width=56) + predicate:((_col1 and _col2 and _col17 and _col21) or (_col3 and _col4 and _col18 and _col22) or (_col5 and _col6 and _col19 and _col22)) + Merge Join Operator [MERGEJOIN_96] (rows=4017306 width=56) + Conds:RS_15._col9=RS_115._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col21","_col22"] + <-Map 11 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] + PartitionCols:_col0 + Select Operator [SEL_114] (rows=1309 width=12) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_113] (rows=1309 width=8) + predicate:(hd_dep_count) IN (3, 1) + TableScan [TS_9] (rows=7200 width=8) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_dep_count"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col9 + Merge Join Operator [MERGEJOIN_95] (rows=22096715 width=213) + Conds:RS_107._col0=RS_125._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_107] + PartitionCols:_col0 + Select Operator [SEL_106] (rows=265971 width=28) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Filter Operator [FIL_105] (rows=265971 width=183) + predicate:((cd_education_status) IN ('4 yr Degree', 'Primary', 'Advanced Degree') and (cd_marital_status) IN ('M', 'D', 'U')) + TableScan [TS_3] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_125] + PartitionCols:_col1 + Select Operator [SEL_124] (rows=152520430 width=257) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Filter Operator [FIL_123] (rows=152520430 width=450) + predicate:((ss_cdemo_sk BETWEEN DynamicValue(RS_12_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_12_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_12_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_16_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_16_household_demographics_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_16_household_demographics_hd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_23_date_dim_d_date_sk_min) AND DynamicValue(RS_23_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_23_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_6] (rows=575995635 width=450) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_116] (rows=1309 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_114] + <-Reducer 5 [BROADCAST_EDGE] vectorized + BROADCAST [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_104] + Group By Operator [GBY_103] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_102] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_100] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_108] (rows=265971 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_106] +>>>>>>> d59ddb6e0a... cost model changes diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out index e796101e45..e97dafc553 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out @@ -103,6 +103,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +<<<<<<< HEAD Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Map 19 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) Reducer 10 <- Map 19 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) @@ -120,11 +121,27 @@ Reducer 5 <- Map 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +======= +Map 12 <- Reducer 11 (BROADCAST_EDGE) +Map 16 <- Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 5 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Map 16 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 5 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 5 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +>>>>>>> d59ddb6e0a... cost model changes Stage-0 Fetch Operator limit:100 Stage-1 +<<<<<<< HEAD Reducer 7 vectorized File Output Operator [FS_259] Limit [LIM_258] (rows=100 width=466) @@ -136,6 +153,19 @@ Stage-0 Select Operator [SEL_255] (rows=4815969644 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] Group By Operator [GBY_254] (rows=4815969644 width=466) +======= + Reducer 4 vectorized + File Output Operator [FS_253] + Limit [LIM_252] (rows=100 width=466) + Number of rows:100 + Select Operator [SEL_251] (rows=13317347 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_250] + Select Operator [SEL_249] (rows=13317347 width=466) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Group By Operator [GBY_248] (rows=13317347 width=466) +>>>>>>> d59ddb6e0a... cost model changes Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_48] @@ -151,6 +181,7 @@ Stage-0 <-Map 21 [SIMPLE_EDGE] vectorized SHUFFLE [RS_253] PartitionCols:_col0 +<<<<<<< HEAD Select Operator [SEL_252] (rows=1704 width=90) Output:["_col0","_col1"] TableScan [TS_31] (rows=1704 width=90) @@ -316,4 +347,127 @@ Stage-0 Select Operator [SEL_219] (rows=101 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_215] +======= + Select Operator [SEL_216] (rows=462000 width=288) + Output:["_col0","_col1","_col2"] + TableScan [TS_0] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Select Operator [SEL_42] (rows=13317347 width=97) + Output:["_col0","_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_214] (rows=13317347 width=97) + Conds:RS_39._col2=RS_247._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col8"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_247] + PartitionCols:_col0 + Select Operator [SEL_246] (rows=1704 width=90) + Output:["_col0","_col1"] + TableScan [TS_34] (rows=1704 width=90) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_213] (rows=13317347 width=11) + Conds:RS_224._col0=RS_37._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_224] + PartitionCols:_col0 + Select Operator [SEL_221] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_218] (rows=3652 width=94) + predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') + TableScan [TS_2] (rows=73049 width=94) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col4 + Select Operator [SEL_33] (rows=266379760 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_212] (rows=266379760 width=23) + Conds:RS_30._col4=RS_228._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_228] + PartitionCols:_col0 + Select Operator [SEL_223] (rows=3652 width=4) + Output:["_col0"] + Filter Operator [FIL_220] (rows=3652 width=94) + predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') + Please refer to the previous TableScan [TS_2] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_211] (rows=5328251731 width=27) + Conds:RS_27._col0=RS_226._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_226] + PartitionCols:_col0 + Select Operator [SEL_222] (rows=101 width=4) + Output:["_col0"] + Filter Operator [FIL_219] (rows=101 width=94) + predicate:(d_quarter_name = '2000Q1') + Please refer to the previous TableScan [TS_2] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=96331893945 width=31) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_210] (rows=96331893945 width=31) + Conds:RS_17._col8, _col7=RS_245._col1, _col2(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col10","_col11","_col14"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] + PartitionCols:_col1, _col2 + Select Operator [SEL_244] (rows=285117831 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_243] (rows=285117831 width=15) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_36_d3_d_date_sk_min) AND DynamicValue(RS_36_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_36_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_11] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_242] + Group By Operator [GBY_241] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + Group By Operator [GBY_229] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_225] (rows=3652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_221] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col8, _col7 + Merge Join Operator [MERGEJOIN_209] (rows=478292911 width=31) + Conds:RS_237._col2, _col1, _col4=RS_240._col2, _col1, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_237] + PartitionCols:_col2, _col1, _col4 + Select Operator [SEL_236] (rows=501694138 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_235] (rows=501694138 width=23) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_28_d1_d_date_sk_min) AND DynamicValue(RS_28_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_5] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_234] + Group By Operator [GBY_233] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_232] + Group By Operator [GBY_230] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_227] (rows=101 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_222] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_240] + PartitionCols:_col2, _col1, _col3 + Select Operator [SEL_239] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_238] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_8] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] +>>>>>>> d59ddb6e0a... cost model changes diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out index 7fbbc0518e..49ced04627 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query24.q.out @@ -114,6 +114,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +<<<<<<< HEAD Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) Map 24 <- Reducer 20 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) @@ -133,11 +134,33 @@ Reducer 5 <- Map 22 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Map 23 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +======= +Map 24 <- Reducer 19 (BROADCAST_EDGE) +Map 8 <- Reducer 14 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 20 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 21 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 23 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 15 <- Map 13 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE) +Reducer 16 <- Map 20 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 23 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Map 21 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 13 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 13 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +>>>>>>> d59ddb6e0a... cost model changes Stage-0 Fetch Operator limit:-1 Stage-1 +<<<<<<< HEAD Reducer 8 File Output Operator [FS_88] Select Operator [SEL_87] (rows=78393744 width=380) @@ -240,6 +263,123 @@ Stage-0 <-Reducer 20 [BROADCAST_EDGE] vectorized BROADCAST [RS_330] Group By Operator [GBY_329] (rows=1 width=12) +======= + Reducer 4 + File Output Operator [FS_92] + Select Operator [SEL_91] (rows=27708538 width=380) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_90] (rows=27708538 width=492) + predicate:(_col3 > _col4) + Merge Join Operator [MERGEJOIN_291] (rows=83125614 width=492) + Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_332] + Select Operator [SEL_331] (rows=83125614 width=380) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_330] (rows=83125614 width=380) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col1, _col7, _col8 + Select Operator [SEL_329] (rows=309360422049 width=843) + Output:["_col1","_col7","_col8","_col9"] + Group By Operator [GBY_328] (rows=309360422049 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_35] (rows=309360422049 width=843) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col3)"],keys:_col4, _col11, _col12, _col0, _col5, _col7, _col8, _col9, _col10 + Merge Join Operator [MERGEJOIN_285] (rows=309360422049 width=843) + Conds:RS_294._col1, _col2=RS_32._col3, _col10(Inner),Output:["_col0","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_294] + PartitionCols:_col1, _col2 + Select Operator [SEL_293] (rows=40000000 width=359) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_292] (rows=40000000 width=272) + predicate:(ca_zip is not null and upper(ca_country) is not null) + TableScan [TS_0] (rows=40000000 width=272) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_state","ca_zip","ca_country"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col3, _col10 + Select Operator [SEL_30] (rows=78430601 width=905) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_284] (rows=78430601 width=905) + Conds:RS_27._col1=RS_326._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col12","_col13","_col14"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_326] + PartitionCols:_col0 + Select Operator [SEL_325] (rows=80000000 width=276) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_324] (rows=80000000 width=276) + predicate:c_birth_country is not null + TableScan [TS_21] (rows=80000000 width=276) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_283] (rows=78430601 width=636) + Conds:RS_24._col0=RS_312._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col10"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_312] + PartitionCols:_col0 + Select Operator [SEL_310] (rows=7000 width=295) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_308] (rows=7000 width=384) + predicate:(i_color = 'orchid') + TableScan [TS_18] (rows=462000 width=384) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=537799796 width=377) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_282] (rows=537799796 width=377) + Conds:RS_14._col0, _col3=RS_322._col0, _col1(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_322] + PartitionCols:_col0, _col1 + Select Operator [SEL_321] (rows=57591150 width=8) + Output:["_col0","_col1"] + TableScan [TS_9] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col0, _col3 + Merge Join Operator [MERGEJOIN_281] (rows=385681992 width=379) + Conds:RS_320._col2=RS_298._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_298] + PartitionCols:_col0 + Select Operator [SEL_297] (rows=155 width=267) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_296] (rows=155 width=271) + predicate:((s_market_id = 7) and s_zip is not null) + TableScan [TS_6] (rows=1704 width=270) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_320] + PartitionCols:_col2 + Select Operator [SEL_319] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_318] (rows=525333486 width=122) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_12_store_s_store_sk_min) AND DynamicValue(RS_12_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_12_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_307] + Group By Operator [GBY_306] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_304] + Group By Operator [GBY_302] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_299] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_297] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) +>>>>>>> d59ddb6e0a... cost model changes Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized SHUFFLE [RS_307] @@ -249,6 +389,7 @@ Stage-0 Output:["_col0"] Please refer to the previous Select Operator [SEL_299] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized +<<<<<<< HEAD PARTITION_ONLY_SHUFFLE [RS_328] Select Operator [SEL_327] (rows=235181232 width=380) Output:["_col0","_col1","_col2","_col3"] @@ -340,4 +481,92 @@ Stage-0 Select Operator [SEL_301] (rows=155 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_299] +======= + PARTITION_ONLY_SHUFFLE [RS_344] + Select Operator [SEL_343] (rows=1 width=112) + Output:["_col0"] + Group By Operator [GBY_342] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + Group By Operator [GBY_340] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] + Select Operator [SEL_339] (rows=2121289008973 width=932) + Output:["_col10"] + Group By Operator [GBY_338] (rows=2121289008973 width=932) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_79] + PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Group By Operator [GBY_78] (rows=2121289008973 width=932) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col3)"],keys:_col0, _col4, _col5, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Merge Join Operator [MERGEJOIN_290] (rows=2121289008973 width=932) + Conds:RS_295._col1, _col2=RS_75._col3, _col11(Inner),Output:["_col0","_col3","_col4","_col5","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + PartitionCols:_col1, _col2 + Please refer to the previous Select Operator [SEL_293] + <-Reducer 18 [SIMPLE_EDGE] + SHUFFLE [RS_75] + PartitionCols:_col3, _col11 + Select Operator [SEL_73] (rows=537799796 width=1023) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_289] (rows=537799796 width=1023) + Conds:RS_70._col4=RS_311._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col14","_col15"] + <-Map 21 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_311] + PartitionCols:_col0 + Select Operator [SEL_309] (rows=462000 width=384) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Please refer to the previous TableScan [TS_18] + <-Reducer 17 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_288] (rows=537799796 width=646) + Conds:RS_327._col0=RS_68._col1(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_327] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_325] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_68] + PartitionCols:_col1 + Select Operator [SEL_64] (rows=537799796 width=377) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_287] (rows=537799796 width=377) + Conds:RS_61._col0, _col3=RS_323._col0, _col1(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"] + <-Map 20 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_323] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_321] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0, _col3 + Merge Join Operator [MERGEJOIN_286] (rows=385681992 width=379) + Conds:RS_337._col2=RS_300._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col8"] + <-Map 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_300] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_297] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_337] + PartitionCols:_col2 + Select Operator [SEL_336] (rows=525333486 width=122) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_335] (rows=525333486 width=122) + predicate:((ss_store_sk BETWEEN DynamicValue(RS_59_store_s_store_sk_min) AND DynamicValue(RS_59_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_59_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_store_sk is not null) + TableScan [TS_50] (rows=575995635 width=122) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_334] + Group By Operator [GBY_333] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_305] + Group By Operator [GBY_303] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_301] (rows=155 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_297] +>>>>>>> d59ddb6e0a... cost model changes diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out index a353c6a128..c8031bfabc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out @@ -109,6 +109,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +<<<<<<< HEAD Map 1 <- Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Map 18 <- Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) Reducer 10 <- Map 18 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) @@ -126,11 +127,27 @@ Reducer 5 <- Map 21 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) +======= +Map 12 <- Reducer 11 (BROADCAST_EDGE) +Map 16 <- Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 5 (CUSTOM_SIMPLE_EDGE) +Reducer 13 <- Map 12 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 14 <- Map 16 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 7 <- Map 17 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 5 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 5 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +>>>>>>> d59ddb6e0a... cost model changes Stage-0 Fetch Operator limit:100 Stage-1 +<<<<<<< HEAD Reducer 7 vectorized File Output Operator [FS_259] Limit [LIM_258] (rows=100 width=808) @@ -140,6 +157,17 @@ Stage-0 <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_256] Group By Operator [GBY_255] (rows=4248052806 width=808) +======= + Reducer 4 vectorized + File Output Operator [FS_251] + Limit [LIM_250] (rows=100 width=808) + Number of rows:100 + Select Operator [SEL_249] (rows=88265283 width=808) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + <-Reducer 3 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_248] + Group By Operator [GBY_247] (rows=88265283 width=808) +>>>>>>> d59ddb6e0a... cost model changes Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_47] @@ -155,6 +183,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_253] (rows=1704 width=192) Output:["_col0","_col1","_col2"] +<<<<<<< HEAD TableScan [TS_31] (rows=1704 width=192) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 4 [SIMPLE_EDGE] @@ -318,4 +347,125 @@ Stage-0 Select Operator [SEL_220] (rows=50 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_216] +======= + TableScan [TS_0] (rows=462000 width=288) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Select Operator [SEL_42] (rows=88265283 width=496) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_213] (rows=88265283 width=496) + Conds:RS_39._col2=RS_246._col0(Inner),Output:["_col1","_col3","_col4","_col6","_col8","_col9"] + <-Map 17 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_246] + PartitionCols:_col0 + Select Operator [SEL_245] (rows=1704 width=192) + Output:["_col0","_col1","_col2"] + TableScan [TS_34] (rows=1704 width=192) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_212] (rows=88265283 width=311) + Conds:RS_223._col0=RS_37._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_223] + PartitionCols:_col0 + Select Operator [SEL_220] (rows=351 width=4) + Output:["_col0"] + Filter Operator [FIL_217] (rows=351 width=12) + predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) + TableScan [TS_2] (rows=73049 width=12) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col4 + Select Operator [SEL_33] (rows=462456048 width=341) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_211] (rows=462456048 width=341) + Conds:RS_30._col4=RS_227._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_227] + PartitionCols:_col0 + Select Operator [SEL_222] (rows=351 width=4) + Output:["_col0"] + Filter Operator [FIL_219] (rows=351 width=12) + predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) + Please refer to the previous TableScan [TS_2] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_210] (rows=2637748473 width=350) + Conds:RS_27._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_225] + PartitionCols:_col0 + Select Operator [SEL_221] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_218] (rows=50 width=12) + predicate:((d_moy = 4) and (d_year = 2000)) + Please refer to the previous TableScan [TS_2] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=96331893945 width=355) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_209] (rows=96331893945 width=355) + Conds:RS_17._col8, _col7=RS_244._col1, _col2(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col10","_col11","_col14"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] + PartitionCols:_col1, _col2 + Select Operator [SEL_243] (rows=285117831 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_242] (rows=285117831 width=123) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_36_d3_d_date_sk_min) AND DynamicValue(RS_36_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_36_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_11] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_241] + Group By Operator [GBY_240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_230] + Group By Operator [GBY_228] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_224] (rows=351 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_220] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col8, _col7 + Merge Join Operator [MERGEJOIN_208] (rows=478292911 width=241) + Conds:RS_236._col2, _col1, _col4=RS_239._col2, _col1, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_236] + PartitionCols:_col2, _col1, _col4 + Select Operator [SEL_235] (rows=501694138 width=126) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_234] (rows=501694138 width=126) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_28_d1_d_date_sk_min) AND DynamicValue(RS_28_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_5] (rows=575995635 width=126) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_233] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_231] + Group By Operator [GBY_229] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_226] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_221] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_239] + PartitionCols:_col2, _col1, _col3 + Select Operator [SEL_238] (rows=53632139 width=123) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_237] (rows=53632139 width=123) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_8] (rows=57591150 width=123) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] +>>>>>>> d59ddb6e0a... cost model changes diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out index 715f1cef53..4e9375d040 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out @@ -205,6 +205,7 @@ Stage-0 <-Map 22 [SIMPLE_EDGE] vectorized SHUFFLE [RS_243] PartitionCols:_col0 +<<<<<<< HEAD Select Operator [SEL_242] (rows=462000 width=288) Output:["_col0","_col1","_col2"] TableScan [TS_22] (rows=462000 width=288) @@ -325,4 +326,102 @@ Stage-0 Select Operator [SEL_147] (rows=7638375 width=6) Output:["_col0"] Please refer to the previous Merge Join Operator [MERGEJOIN_197] +======= + Select Operator [SEL_218] (rows=1957 width=4) + Output:["_col0"] + Filter Operator [FIL_217] (rows=1957 width=8) + predicate:(d_year) IN (1999, 2000, 2001) + TableScan [TS_2] (rows=73049 width=8) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col4 + Select Operator [SEL_33] (rows=264825249 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_211] (rows=264825249 width=23) + Conds:RS_30._col4=RS_229._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_229] + PartitionCols:_col0 + Select Operator [SEL_226] (rows=201 width=4) + Output:["_col0"] + Filter Operator [FIL_224] (rows=201 width=12) + predicate:((d_year = 1999) and d_moy BETWEEN 4 AND 7) + TableScan [TS_21] (rows=73049 width=12) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_210] (rows=2637748473 width=27) + Conds:RS_27._col0=RS_227._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 16 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_227] + PartitionCols:_col0 + Select Operator [SEL_225] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_223] (rows=50 width=12) + predicate:((d_moy = 4) and (d_year = 1999)) + Please refer to the previous TableScan [TS_21] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=96331893945 width=31) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_209] (rows=96331893945 width=31) + Conds:RS_17._col8, _col7=RS_244._col1, _col2(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col10","_col11","_col14"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_244] + PartitionCols:_col1, _col2 + Select Operator [SEL_243] (rows=285117831 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_242] (rows=285117831 width=15) + predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_36_d3_d_date_sk_min) AND DynamicValue(RS_36_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_36_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_11] (rows=287989836 width=15) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_241] + Group By Operator [GBY_240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_220] (rows=1957 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_218] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col8, _col7 + Merge Join Operator [MERGEJOIN_208] (rows=478292911 width=31) + Conds:RS_236._col2, _col1, _col4=RS_239._col2, _col1, _col3(Inner),Output:["_col0","_col1","_col3","_col5","_col6","_col7","_col8","_col10"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_239] + PartitionCols:_col2, _col1, _col3 + Select Operator [SEL_238] (rows=53632139 width=19) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_237] (rows=53632139 width=19) + predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) + TableScan [TS_8] (rows=57591150 width=19) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_236] + PartitionCols:_col2, _col1, _col4 + Select Operator [SEL_235] (rows=501694138 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_234] (rows=501694138 width=23) + predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_28_d1_d_date_sk_min) AND DynamicValue(RS_28_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_28_d1_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_5] (rows=575995635 width=23) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_233] + Group By Operator [GBY_232] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_231] + Group By Operator [GBY_230] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_228] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_225] +>>>>>>> d59ddb6e0a... cost model changes diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out index b84dfce073..c8a6c4485a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query48.q.out @@ -143,6 +143,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +<<<<<<< HEAD Map 7 <- Reducer 11 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) @@ -150,12 +151,22 @@ Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 1 (CUSTOM_SIMPLE_EDGE) +======= +Map 5 <- Reducer 11 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +>>>>>>> d59ddb6e0a... cost model changes Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 +<<<<<<< HEAD Reducer 5 vectorized File Output Operator [FS_102] Group By Operator [GBY_101] (rows=1 width=8) @@ -209,9 +220,65 @@ Stage-0 default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_100] +======= + Reducer 3 vectorized + File Output Operator [FS_103] + Group By Operator [GBY_102] (rows=1 width=8) + Output:["_col0"],aggregations:["sum(VALUE._col0)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_25] + Group By Operator [GBY_24] (rows=1 width=8) + Output:["_col0"],aggregations:["sum(_col2)"] + Merge Join Operator [MERGEJOIN_74] (rows=344227 width=0) + Conds:RS_77._col0=RS_21._col0(Inner),Output:["_col2"] + <-Map 1 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_77] + PartitionCols:_col0 + Select Operator [SEL_76] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_75] (rows=652 width=8) + predicate:(d_year = 1998) + TableScan [TS_0] (rows=73049 width=8) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_19] (rows=964059 width=0) + Output:["_col0","_col1"] + Filter Operator [FIL_18] (rows=964059 width=24) + predicate:((_col10 and _col5) or (_col11 and _col6) or (_col9 and _col4)) + Merge Join Operator [MERGEJOIN_73] (rows=1285415 width=24) + Conds:RS_15._col2=RS_91._col0(Inner),Output:["_col0","_col3","_col4","_col5","_col6","_col9","_col10","_col11"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_91] + PartitionCols:_col0 + Select Operator [SEL_90] (rows=20000000 width=16) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_89] (rows=20000000 width=187) + predicate:(ca_country = 'United States') + TableScan [TS_9] (rows=40000000 width=187) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_72] (rows=2570829 width=12) + Conds:RS_101._col1=RS_83._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_83] + PartitionCols:_col0 + Select Operator [SEL_82] (rows=29552 width=4) + Output:["_col0"] + Filter Operator [FIL_81] (rows=29552 width=183) + predicate:((cd_education_status = '4 yr Degree') and (cd_marital_status = 'M')) + TableScan [TS_6] (rows=1861800 width=183) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Map 5 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_101] +>>>>>>> d59ddb6e0a... cost model changes PartitionCols:_col1 Select Operator [SEL_99] (rows=53235296 width=27) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] +<<<<<<< HEAD Filter Operator [FIL_98] (rows=53235296 width=233) predicate:((ss_addr_sk BETWEEN DynamicValue(RS_19_customer_address_ca_address_sk_min) AND DynamicValue(RS_19_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_19_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_12_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_12_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_12_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_net_profit BETWEEN 0 AND 2000 or ss_net_profit BETWEEN 150 AND 3000 or ss_net_profit BETWEEN 50 AND 25000) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_3] (rows=575995635 width=233) @@ -236,9 +303,36 @@ Stage-0 Group By Operator [GBY_78] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_77] (rows=29552 width=4) +======= + Filter Operator [FIL_99] (rows=159705893 width=233) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_13_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_13_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_13_customer_demographics_cd_demo_sk_bloom_filter))) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and (ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=233) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_net_profit"] + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_96] + Group By Operator [GBY_95] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] + Select Operator [SEL_92] (rows=20000000 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_90] + <-Reducer 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_98] + Group By Operator [GBY_97] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_80] + Group By Operator [GBY_79] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_78] (rows=652 width=4) +>>>>>>> d59ddb6e0a... cost model changes Output:["_col0"] Please refer to the previous Select Operator [SEL_75] <-Reducer 9 [BROADCAST_EDGE] vectorized +<<<<<<< HEAD BROADCAST [RS_89] Group By Operator [GBY_88] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] @@ -249,4 +343,16 @@ Stage-0 Select Operator [SEL_85] (rows=652 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_83] +======= + BROADCAST [RS_88] + Group By Operator [GBY_87] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_86] + Group By Operator [GBY_85] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_84] (rows=29552 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_82] +>>>>>>> d59ddb6e0a... cost model changes diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out index aebd6b4137..ec13b058dc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query64.q.out @@ -1,3 +1,10 @@ +<<<<<<< HEAD +======= +Warning: Shuffle Join MERGEJOIN[932][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[933][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 7' is a cross product +Warning: Shuffle Join MERGEJOIN[947][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 24' is a cross product +Warning: Shuffle Join MERGEJOIN[948][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5]] in Stage 'Reducer 25' is a cross product +>>>>>>> d59ddb6e0a... cost model changes PREHOOK: query: explain with cs_ui as (select cs_item_sk @@ -265,6 +272,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +<<<<<<< HEAD Map 33 <- Reducer 29 (BROADCAST_EDGE), Reducer 36 (BROADCAST_EDGE), Reducer 42 (BROADCAST_EDGE) Map 39 <- Reducer 36 (BROADCAST_EDGE) Map 50 <- Reducer 12 (BROADCAST_EDGE), Reducer 32 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE), Reducer 46 (BROADCAST_EDGE) @@ -307,6 +315,54 @@ Reducer 6 <- Map 49 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 18 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Map 49 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +======= +Map 1 <- Reducer 38 (BROADCAST_EDGE), Reducer 44 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE) +Map 35 <- Reducer 52 (BROADCAST_EDGE) +Map 54 <- Reducer 42 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE) +Map 55 <- Reducer 52 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE) +Reducer 10 <- Map 48 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 49 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 49 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 50 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Map 51 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Map 50 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Reducer 16 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 19 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 41 (ONE_TO_ONE_EDGE) +Reducer 22 <- Map 43 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 23 <- Map 46 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 24 <- Map 43 (CUSTOM_SIMPLE_EDGE), Reducer 23 (CUSTOM_SIMPLE_EDGE) +Reducer 25 <- Map 43 (CUSTOM_SIMPLE_EDGE), Reducer 24 (CUSTOM_SIMPLE_EDGE) +Reducer 26 <- Map 47 (SIMPLE_EDGE), Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Map 48 (SIMPLE_EDGE), Reducer 26 (SIMPLE_EDGE) +Reducer 28 <- Map 48 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 49 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 37 (ONE_TO_ONE_EDGE) +Reducer 30 <- Map 49 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 31 <- Map 50 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 32 <- Map 51 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Map 50 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (SIMPLE_EDGE) +Reducer 36 <- Map 35 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) +Reducer 37 <- Reducer 36 (SIMPLE_EDGE) +Reducer 38 <- Reducer 37 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 43 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) +Reducer 41 <- Reducer 40 (SIMPLE_EDGE) +Reducer 42 <- Reducer 41 (CUSTOM_SIMPLE_EDGE) +Reducer 44 <- Map 43 (CUSTOM_SIMPLE_EDGE) +Reducer 45 <- Map 43 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Map 46 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 52 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 53 <- Map 51 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 43 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 43 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 47 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 48 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +>>>>>>> d59ddb6e0a... cost model changes Stage-0 Fetch Operator @@ -334,6 +390,7 @@ Stage-0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 +<<<<<<< HEAD Group By Operator [GBY_93] (rows=2299138 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col37)","sum(_col38)","sum(_col39)"],keys:_col26, _col40, _col27, _col7, _col8, _col9, _col10, _col13, _col15, _col21, _col22, _col23, _col24, _col41 Select Operator [SEL_92] (rows=2331650 width=1292) @@ -344,9 +401,30 @@ Stage-0 Conds:RS_88._col32=RS_926._col0(Inner),Output:["_col7","_col8","_col9","_col10","_col13","_col15","_col17","_col21","_col22","_col23","_col24","_col26","_col27","_col37","_col38","_col39","_col40","_col41","_col45"] <-Map 49 [SIMPLE_EDGE] vectorized SHUFFLE [RS_926] +======= + Group By Operator [GBY_97] (rows=1914456248823429 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col6, _col13, _col7, _col4, _col5, _col9, _col10, _col11, _col12, _col14, _col16, _col17, _col18, _col19 + Merge Join Operator [MERGEJOIN_941] (rows=1914456248823429 width=1353) + Conds:RS_93._col8=RS_1037._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col16","_col17","_col18","_col19"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1037] + PartitionCols:_col0 + Select Operator [SEL_1035] (rows=40000000 width=365) + Output:["_col0","_col1","_col2","_col3","_col4"] + TableScan [TS_76] (rows=40000000 width=365) + default@customer_address,ad1,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_93] + PartitionCols:_col8 + Merge Join Operator [MERGEJOIN_940] (rows=1914456248823429 width=996) + Conds:RS_90._col0=RS_960._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Map 51 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_960] +>>>>>>> d59ddb6e0a... cost model changes PartitionCols:_col0 Select Operator [SEL_925] (rows=1861800 width=89) Output:["_col0","_col1"] +<<<<<<< HEAD TableScan [TS_68] (rows=1861800 width=89) default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] <-Reducer 7 [SIMPLE_EDGE] @@ -576,6 +654,233 @@ Stage-0 Please refer to the previous Select Operator [SEL_952] <-Reducer 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_999] +======= + Filter Operator [FIL_958] (rows=518 width=312) + predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50) + TableScan [TS_85] (rows=462000 width=311) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] + <-Reducer 13 [SIMPLE_EDGE] + SHUFFLE [RS_90] + PartitionCols:_col0 + Select Operator [SEL_84] (rows=177398042779540896 width=51) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_939] (rows=177398042779540896 width=51) + Conds:RS_81._col1=RS_1036._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col13","_col14","_col15","_col16"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1036] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1035] + <-Reducer 12 [SIMPLE_EDGE] + SHUFFLE [RS_81] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_938] (rows=177398042779540896 width=51) + Conds:RS_78._col9=RS_1032._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1032] + PartitionCols:_col0 + Select Operator [SEL_1030] (rows=7200 width=4) + Output:["_col0"] + Filter Operator [FIL_1029] (rows=7200 width=8) + predicate:hd_income_band_sk is not null + TableScan [TS_62] (rows=7200 width=8) + default@household_demographics,hd1,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_income_band_sk"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_78] + PartitionCols:_col9 + Select Operator [SEL_72] (rows=177398042779540896 width=51) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_937] (rows=177398042779540896 width=51) + Conds:RS_69._col1=RS_1031._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1031] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1030] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col1 + Filter Operator [FIL_68] (rows=177398042779540896 width=51) + predicate:(_col13 <> _col15) + Merge Join Operator [MERGEJOIN_936] (rows=177398042779540896 width=51) + Conds:RS_65._col10=RS_1026._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col15"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1026] + PartitionCols:_col0 + Select Operator [SEL_1024] (rows=1861800 width=89) + Output:["_col0","_col1"] + TableScan [TS_51] (rows=1861800 width=89) + default@customer_demographics,cd1,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col10 + Select Operator [SEL_59] (rows=174924398677811552 width=52) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_935] (rows=174924398677811552 width=52) + Conds:RS_56._col2=RS_1025._col0(Inner),Output:["_col0","_col3","_col4","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col21"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1025] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1024] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_934] (rows=172485247150217472 width=53) + Conds:RS_53._col1, _col10, _col8=RS_1022._col0, _col4, _col5(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1022] + PartitionCols:_col0, _col4, _col5 + Select Operator [SEL_1021] (rows=69376329 width=23) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_1020] (rows=69376329 width=23) + predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) + TableScan [TS_48] (rows=80000000 width=23) + default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1, _col10, _col8 + Select Operator [SEL_47] (rows=172485247150217474 width=53) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_933] (rows=172485247150217474 width=53) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_996] + Select Operator [SEL_990] (rows=73049 width=8) + Output:["_col0","_col1"] + TableScan [TS_25] (rows=73049 width=8) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_44] + Merge Join Operator [MERGEJOIN_932] (rows=2361226671826 width=540) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14","_col15"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_995] + Select Operator [SEL_989] (rows=73049 width=8) + Output:["_col0","_col1"] + Please refer to the previous TableScan [TS_25] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_41] + Merge Join Operator [MERGEJOIN_931] (rows=32323874 width=328) + Conds:RS_38._col6=RS_1018._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13"] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1018] + PartitionCols:_col0 + Select Operator [SEL_1017] (rows=1704 width=181) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1016] (rows=1704 width=181) + predicate:(s_store_name is not null and s_zip is not null) + TableScan [TS_28] (rows=1704 width=181) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_930] (rows=32323874 width=153) + Conds:RS_35._col0=RS_1000._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Map 43 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1000] + PartitionCols:_col0 + Select Operator [SEL_994] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_988] (rows=652 width=8) + predicate:(d_year = 2000) + Please refer to the previous TableScan [TS_25] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_24] (rows=90527915 width=289) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_929] (rows=90527915 width=289) + Conds:RS_21._col1=RS_982._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + <-Reducer 37 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_982] + PartitionCols:_col0 + Select Operator [SEL_981] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_980] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_979] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 36 [SIMPLE_EDGE] + SHUFFLE [RS_14] + PartitionCols:_col0 + Group By Operator [GBY_13] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_928] (rows=183085709 width=227) + Conds:RS_975._col0, _col1=RS_977._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_977] + PartitionCols:_col0, _col1 + Select Operator [SEL_976] (rows=28798881 width=120) + Output:["_col0","_col1","_col2"] + TableScan [TS_7] (rows=28798881 width=337) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] + <-Map 35 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_975] + PartitionCols:_col0, _col1 + Select Operator [SEL_974] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_973] (rows=287989836 width=119) + predicate:(cs_item_sk BETWEEN DynamicValue(RS_91_item_i_item_sk_min) AND DynamicValue(RS_91_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_91_item_i_item_sk_bloom_filter))) + TableScan [TS_5] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_970] + Group By Operator [GBY_968] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_966] + Group By Operator [GBY_964] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_961] (rows=518 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_959] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_927] (rows=427216373 width=348) + Conds:RS_1012._col1, _col7=RS_1014._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1014] + PartitionCols:_col0, _col1 + Select Operator [SEL_1013] (rows=57591150 width=8) + Output:["_col0","_col1"] + TableScan [TS_3] (rows=57591150 width=8) + default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1012] + PartitionCols:_col1, _col7 + Select Operator [SEL_1011] (rows=417313408 width=351) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_1010] (rows=417313408 width=355) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_22_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_22_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_22_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_91_item_i_item_sk_min) AND DynamicValue(RS_91_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_91_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_0] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_969] + Please refer to the previous Group By Operator [GBY_968] + <-Reducer 38 [BROADCAST_EDGE] vectorized + BROADCAST [RS_987] + Group By Operator [GBY_986] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 37 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_985] + Group By Operator [GBY_984] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_983] (rows=13257 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_981] + <-Reducer 44 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1009] + Group By Operator [GBY_1008] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1006] + Group By Operator [GBY_1004] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1001] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_994] + <-Reducer 34 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1065] +>>>>>>> d59ddb6e0a... cost model changes PartitionCols:_col1, _col0, _col2 Select Operator [SEL_998] (rows=2299138 width=525) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] @@ -584,6 +889,7 @@ Stage-0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_191] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 +<<<<<<< HEAD Group By Operator [GBY_190] (rows=2299138 width=1362) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col37)","sum(_col38)","sum(_col39)"],keys:_col26, _col40, _col27, _col7, _col8, _col9, _col10, _col13, _col15, _col21, _col22, _col23, _col24, _col41 Select Operator [SEL_189] (rows=2331650 width=1292) @@ -755,4 +1061,223 @@ Stage-0 Select Operator [SEL_987] (rows=13257 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_985] +======= + Group By Operator [GBY_198] (rows=1914456248823429 width=1362) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count()","sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col6, _col13, _col7, _col4, _col5, _col9, _col10, _col11, _col12, _col14, _col16, _col17, _col18, _col19 + Merge Join Operator [MERGEJOIN_956] (rows=1914456248823429 width=1353) + Conds:RS_194._col8=RS_1039._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col16","_col17","_col18","_col19"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1039] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1035] + <-Reducer 32 [SIMPLE_EDGE] + SHUFFLE [RS_194] + PartitionCols:_col8 + Merge Join Operator [MERGEJOIN_955] (rows=1914456248823429 width=996) + Conds:RS_191._col0=RS_962._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + <-Map 51 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_962] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_959] + <-Reducer 31 [SIMPLE_EDGE] + SHUFFLE [RS_191] + PartitionCols:_col0 + Select Operator [SEL_185] (rows=177398042779540896 width=51) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] + Merge Join Operator [MERGEJOIN_954] (rows=177398042779540896 width=51) + Conds:RS_182._col1=RS_1038._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col13","_col14","_col15","_col16"] + <-Map 50 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1038] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1035] + <-Reducer 30 [SIMPLE_EDGE] + SHUFFLE [RS_182] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_953] (rows=177398042779540896 width=51) + Conds:RS_179._col9=RS_1034._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1034] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1030] + <-Reducer 29 [SIMPLE_EDGE] + SHUFFLE [RS_179] + PartitionCols:_col9 + Select Operator [SEL_173] (rows=177398042779540896 width=51) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_952] (rows=177398042779540896 width=51) + Conds:RS_170._col1=RS_1033._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12"] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1033] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1030] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_170] + PartitionCols:_col1 + Filter Operator [FIL_169] (rows=177398042779540896 width=51) + predicate:(_col13 <> _col15) + Merge Join Operator [MERGEJOIN_951] (rows=177398042779540896 width=51) + Conds:RS_166._col10=RS_1028._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col11","_col12","_col13","_col15"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1028] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1024] + <-Reducer 27 [SIMPLE_EDGE] + SHUFFLE [RS_166] + PartitionCols:_col10 + Select Operator [SEL_160] (rows=174924398677811552 width=52) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_950] (rows=174924398677811552 width=52) + Conds:RS_157._col2=RS_1027._col0(Inner),Output:["_col0","_col3","_col4","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col21"] + <-Map 48 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1027] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1024] + <-Reducer 26 [SIMPLE_EDGE] + SHUFFLE [RS_157] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_949] (rows=172485247150217472 width=53) + Conds:RS_154._col1, _col10, _col8=RS_1023._col0, _col4, _col5(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col11","_col12","_col13","_col15","_col16","_col17"] + <-Map 47 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1023] + PartitionCols:_col0, _col4, _col5 + Please refer to the previous Select Operator [SEL_1021] + <-Reducer 25 [SIMPLE_EDGE] + SHUFFLE [RS_154] + PartitionCols:_col1, _col10, _col8 + Select Operator [SEL_148] (rows=172485247150217474 width=53) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_948] (rows=172485247150217474 width=53) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14","_col15","_col16","_col17"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_999] + Select Operator [SEL_993] (rows=73049 width=8) + Output:["_col0","_col1"] + Please refer to the previous TableScan [TS_25] + <-Reducer 24 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_145] + Merge Join Operator [MERGEJOIN_947] (rows=2361226671826 width=540) + Conds:(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13","_col14","_col15"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_998] + Select Operator [SEL_992] (rows=73049 width=8) + Output:["_col0","_col1"] + Please refer to the previous TableScan [TS_25] + <-Reducer 23 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_142] + Merge Join Operator [MERGEJOIN_946] (rows=32323874 width=328) + Conds:RS_139._col6=RS_1019._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col7","_col8","_col9","_col12","_col13"] + <-Map 46 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1019] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_1017] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_139] + PartitionCols:_col6 + Merge Join Operator [MERGEJOIN_945] (rows=32323874 width=153) + Conds:RS_136._col0=RS_1002._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + <-Map 43 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1002] + PartitionCols:_col0 + Select Operator [SEL_997] (rows=652 width=4) + Output:["_col0"] + Filter Operator [FIL_991] (rows=652 width=8) + predicate:(d_year = 2001) + Please refer to the previous TableScan [TS_25] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_136] + PartitionCols:_col0 + Select Operator [SEL_125] (rows=90527915 width=289) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_944] (rows=90527915 width=289) + Conds:RS_122._col1=RS_1052._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + <-Reducer 41 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1052] + PartitionCols:_col0 + Select Operator [SEL_1051] (rows=13257 width=4) + Output:["_col0"] + Filter Operator [FIL_1050] (rows=13257 width=228) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1049] (rows=39773 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 40 [SIMPLE_EDGE] + SHUFFLE [RS_115] + PartitionCols:_col0 + Group By Operator [GBY_114] (rows=6482999 width=228) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col5)"],keys:_col0 + Merge Join Operator [MERGEJOIN_943] (rows=183085709 width=227) + Conds:RS_1048._col0, _col1=RS_978._col0, _col1(Inner),Output:["_col0","_col2","_col5"] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_978] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_976] + <-Map 55 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1048] + PartitionCols:_col0, _col1 + Select Operator [SEL_1047] (rows=287989836 width=119) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1046] (rows=287989836 width=119) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_192_item_i_item_sk_min) AND DynamicValue(RS_192_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_192_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_91_item_i_item_sk_min) AND DynamicValue(RS_91_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_91_item_i_item_sk_bloom_filter)))) + TableScan [TS_106] (rows=287989836 width=119) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_972] + Please refer to the previous Group By Operator [GBY_968] + <-Reducer 53 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1045] + Group By Operator [GBY_1043] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 51 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_967] + Group By Operator [GBY_965] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_963] (rows=518 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_959] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_122] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_942] (rows=427216373 width=348) + Conds:RS_1062._col1, _col7=RS_1015._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1015] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1013] + <-Map 54 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1062] + PartitionCols:_col1, _col7 + Select Operator [SEL_1061] (rows=417313408 width=351) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + Filter Operator [FIL_1060] (rows=417313408 width=355) + predicate:((ss_item_sk BETWEEN DynamicValue(RS_123_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_123_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_123_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_192_item_i_item_sk_min) AND DynamicValue(RS_192_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_192_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_91_item_i_item_sk_min) AND DynamicValue(RS_91_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_91_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_137_d1_d_date_sk_min) AND DynamicValue(RS_137_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_137_d1_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_101] (rows=575995635 width=355) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 52 [BROADCAST_EDGE] vectorized + BROADCAST [RS_971] + Please refer to the previous Group By Operator [GBY_968] + <-Reducer 53 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1044] + Please refer to the previous Group By Operator [GBY_1043] + <-Reducer 42 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1057] + Group By Operator [GBY_1056] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1055] + Group By Operator [GBY_1054] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1053] (rows=13257 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1051] + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1059] + Group By Operator [GBY_1058] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 43 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1007] + Group By Operator [GBY_1005] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1003] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_997] +>>>>>>> d59ddb6e0a... cost model changes diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out index f27fbc9273..0690bb5dcc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query72.q.out @@ -81,6 +81,7 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage +<<<<<<< HEAD Map 9 <- Reducer 17 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) Reducer 11 <- Map 18 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) @@ -97,11 +98,30 @@ Reducer 4 <- Map 25 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Map 26 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +======= +Map 15 <- Reducer 17 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Reducer 10 <- Map 25 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 26 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 21 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 16 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 18 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 19 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Map 21 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Map 23 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 24 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +>>>>>>> d59ddb6e0a... cost model changes Stage-0 Fetch Operator limit:100 Stage-1 +<<<<<<< HEAD Reducer 7 vectorized File Output Operator [FS_293] Limit [LIM_292] (rows=100 width=312) @@ -294,4 +314,202 @@ Stage-0 Output:["_col0","_col1"] TableScan [TS_2] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] +======= + Reducer 13 vectorized + File Output Operator [FS_298] + Limit [LIM_297] (rows=100 width=312) + Number of rows:100 + Select Operator [SEL_296] (rows=3148061 width=312) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + <-Reducer 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_295] + Group By Operator [GBY_294] (rows=3148061 width=312) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_65] (rows=558148039 width=312) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col5)","count(_col6)","count()"],keys:_col3, _col2, _col4 + Select Operator [SEL_63] (rows=558148039 width=292) + Output:["_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_251] (rows=558148039 width=292) + Conds:RS_60._col0, _col2=RS_293._col0, _col1(Left Outer),Output:["_col3","_col4","_col5","_col6"] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_293] + PartitionCols:_col0, _col1 + Select Operator [SEL_292] (rows=28798881 width=8) + Output:["_col0","_col1"] + TableScan [TS_55] (rows=28798881 width=8) + default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0, _col2 + Merge Join Operator [MERGEJOIN_250] (rows=216421239 width=300) + Conds:RS_57._col1=RS_291._col0(Left Outer),Output:["_col0","_col2","_col3","_col4","_col5","_col6"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_291] + PartitionCols:_col0 + Select Operator [SEL_290] (rows=2300 width=4) + Output:["_col0"] + TableScan [TS_53] (rows=2300 width=4) + default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1 + Select Operator [SEL_52] (rows=216421239 width=299) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_51] (rows=216421239 width=315) + predicate:(_col12 > _col8) + Merge Join Operator [MERGEJOIN_249] (rows=649263719 width=315) + Conds:RS_48._col0=RS_289._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col12"] + <-Map 24 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_289] + PartitionCols:_col0 + Select Operator [SEL_288] (rows=73049 width=12) + Output:["_col0","_col1"] + TableScan [TS_43] (rows=73049 width=98) + default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_48] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_248] (rows=649263719 width=311) + Conds:RS_45._col4, _col7=RS_287._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_287] + PartitionCols:_col0, _col1 + Select Operator [SEL_286] (rows=73049 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_285] (rows=73049 width=8) + predicate:d_week_seq is not null + TableScan [TS_40] (rows=73049 width=8) + default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_week_seq"] + <-Reducer 7 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col4, _col7 + Select Operator [SEL_39] (rows=580674579 width=315) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_247] (rows=580674579 width=315) + Conds:RS_36._col2=RS_274._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col11"] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_274] + PartitionCols:_col0 + Select Operator [SEL_273] (rows=1440 width=4) + Output:["_col0"] + Filter Operator [FIL_272] (rows=1440 width=96) + predicate:(hd_buy_potential = '1001-5000') + TableScan [TS_30] (rows=7200 width=96) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["hd_demo_sk","hd_buy_potential"] + <-Reducer 6 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_246] (rows=2903372850 width=319) + Conds:RS_33._col0=RS_266._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col10","_col11"] + <-Map 19 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_266] + PartitionCols:_col0 + Select Operator [SEL_265] (rows=652 width=16) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_264] (rows=652 width=106) + predicate:((d_year = 2001) and d_week_seq is not null) + TableScan [TS_27] (rows=73049 width=106) + default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date","d_week_seq","d_year"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=8189226191 width=311) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_245] (rows=8189226191 width=311) + Conds:RS_23._col4=RS_284._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] + <-Map 18 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_284] + PartitionCols:_col0 + Select Operator [SEL_283] (rows=462000 width=188) + Output:["_col0","_col1"] + TableScan [TS_18] (rows=462000 width=188) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_desc"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_23] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_244] (rows=8189226191 width=127) + Conds:RS_20._col2=RS_258._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7","_col8"] + <-Map 16 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_258] + PartitionCols:_col0 + Select Operator [SEL_257] (rows=265971 width=4) + Output:["_col0"] + Filter Operator [FIL_256] (rows=265971 width=89) + predicate:(cd_marital_status = 'M') + TableScan [TS_15] (rows=1861800 width=89) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_marital_status"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_20] + PartitionCols:_col2 + Select Operator [SEL_14] (rows=56525338557 width=131) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Filter Operator [FIL_13] (rows=56525338557 width=139) + predicate:(_col3 < _col13) + Merge Join Operator [MERGEJOIN_243] (rows=169576015673 width=139) + Conds:RS_10._col1=RS_282._col4(Inner),Output:["_col0","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_282] + PartitionCols:_col4 + Select Operator [SEL_281] (rows=282274763 width=31) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_280] (rows=282274763 width=31) + predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_21_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_21_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_21_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_37_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_37_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_37_household_demographics_hd_demo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_34_d1_d_date_sk_min) AND DynamicValue(RS_34_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_34_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_4] (rows=287989836 width=31) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_263] + Group By Operator [GBY_262] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_261] + Group By Operator [GBY_260] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_259] (rows=265971 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_257] + <-Reducer 20 [BROADCAST_EDGE] vectorized + BROADCAST [RS_271] + Group By Operator [GBY_270] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_269] + Group By Operator [GBY_268] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_267] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_265] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_279] + Group By Operator [GBY_278] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_277] + Group By Operator [GBY_276] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_275] (rows=1440 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_273] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_242] (rows=37584000 width=111) + Conds:RS_253._col2=RS_255._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_253] + PartitionCols:_col2 + Select Operator [SEL_252] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + TableScan [TS_0] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_255] + PartitionCols:_col0 + Select Operator [SEL_254] (rows=27 width=104) + Output:["_col0","_col1"] + TableScan [TS_2] (rows=27 width=104) + default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] +>>>>>>> d59ddb6e0a... cost model changes