diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 6ea68c3500..41fae36144 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3752,6 +3752,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "When dynamic pruning is enabled, joins on partition keys will be processed by sending\n" + "events from the processing vertices to the Tez application master. These events will be\n" + "used to prune unnecessary partitions."), + TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED("hive.tez.dynamic.partition.pruning.extended", true, + "Whether we should try to create additional opportunities for dynamic pruning, e.g., considering\n" + + "siblings that may not be created by normal dynamic pruning logic.\n" + + "Only works when dynamic pruning is enabled."), TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE("hive.tez.dynamic.partition.pruning.max.event.size", 1*1024*1024L, "Maximum size of events sent by processors in dynamic pruning. If this size is crossed no pruning will take place."), diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 4001b9f452..d08528f319 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -894,6 +894,7 @@ minillaplocal.query.files=\ unionDistinct_3.q,\ vectorized_join46.q,\ vectorized_multi_output_select.q,\ + dynamic_semijoin_reduction_sw2.q,\ partialdhj.q,\ stats_date.q,\ dst.q diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 119aa925c1..9bce43ca58 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -169,44 +169,7 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run reduce sink after join algorithm selection"); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - runRemoveDynamicPruningOptimization(procCtx, inputs, outputs); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - markSemiJoinForDPP(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); - - // Removing semijoin optimization when it may not be beneficial - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - removeSemijoinOptimizationByBenefit(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Remove any parallel edge between semijoin and mapjoin. - removeSemijoinsParallelToMapJoin(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run the optimizations that use stats for optimization"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Remove semijoin optimization if it creates a cycle with mapside joins - removeSemiJoinCyclesDueToMapsideJoins(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Remove semijoin optimization if SMB join is created. - removeSemijoinOptimizationFromSMBJoins(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // Remove bloomfilter if no stats generated - removeSemiJoinIfNoStats(procCtx); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); - // after the stats phase we might have some cyclic dependencies that we need - // to take care of. - runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); + semijoinRemovalBasedTransformations(procCtx, inputs, outputs); perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); if(procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_WORK_OPTIMIZATION)) { @@ -230,11 +193,6 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, private void runCycleAnalysisForPartitionPruning(OptimizeTezProcContext procCtx, Set inputs, Set outputs) throws SemanticException { - - if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING)) { - return; - } - boolean cycleFree = false; while (!cycleFree) { cycleFree = true; @@ -454,6 +412,80 @@ private void runStatsDependentOptimizations(OptimizeTezProcContext procCtx, ogw.startWalking(topNodes, null); } + private void semijoinRemovalBasedTransformations(OptimizeTezProcContext procCtx, + Set inputs, Set outputs) throws SemanticException { + PerfLogger perfLogger = SessionState.getPerfLogger(); + + final boolean dynamicPartitionPruningEnabled = + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING); + final boolean semiJoinReductionEnabled = dynamicPartitionPruningEnabled && + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION); + final boolean extendedReductionEnabled = dynamicPartitionPruningEnabled && + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (dynamicPartitionPruningEnabled) { + runRemoveDynamicPruningOptimization(procCtx, inputs, outputs); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run remove dynamic pruning by size"); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled) { + markSemiJoinForDPP(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Mark certain semijoin edges important based "); + + // Removing semijoin optimization when it may not be beneficial + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled) { + removeSemijoinOptimizationByBenefit(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove Semijoins based on cost benefits"); + + // Remove any parallel edge between semijoin and mapjoin. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled) { + removeSemijoinsParallelToMapJoin(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove any parallel edge between semijoin and mapjoin"); + + // Remove semijoin optimization if it creates a cycle with mapside joins + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + removeSemiJoinCyclesDueToMapsideJoins(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if it creates a cycle with mapside join"); + + // Remove semijoin optimization if SMB join is created. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + removeSemijoinOptimizationFromSMBJoins(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove semijoin optimizations if needed"); + + // Remove bloomfilter if no stats generated + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (semiJoinReductionEnabled && procCtx.parseContext.getRsToSemiJoinBranchInfo().size() != 0) { + removeSemiJoinIfNoStats(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove bloom filter optimizations if needed"); + + // after the stats phase we might have some cyclic dependencies that we need + // to take care of. + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (dynamicPartitionPruningEnabled) { + runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); + + // remove redundant dpp and semijoins + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if (extendedReductionEnabled) { + removeRedundantSemijoinAndDpp(procCtx); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Remove redundant semijoin reduction"); + } + private void runRemoveDynamicPruningOptimization(OptimizeTezProcContext procCtx, Set inputs, Set outputs) throws SemanticException { @@ -739,11 +771,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private static void removeSemijoinOptimizationFromSMBJoins( OptimizeTezProcContext procCtx) throws SemanticException { - if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || - procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) { - return; - } - Map opRules = new LinkedHashMap(); opRules.put( new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%" + @@ -825,11 +852,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, private static void removeSemiJoinCyclesDueToMapsideJoins( OptimizeTezProcContext procCtx) throws SemanticException { - if (!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || - procCtx.parseContext.getRsToSemiJoinBranchInfo().size() == 0) { - return; - } - Map opRules = new LinkedHashMap(); opRules.put( new RuleRegExp("R1", MapJoinOperator.getOperatorName() + "%" + @@ -914,98 +936,15 @@ private static void removeSemiJoinCyclesDueToMapsideJoins( } } - private static class SemiJoinRemovalIfNoStatsProc implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - assert nd instanceof ReduceSinkOperator; - ReduceSinkOperator rs = (ReduceSinkOperator) nd; - ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext; - SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs); - if (sjInfo == null) { - // nothing to do here. - return null; - } - - // This is a semijoin branch. The stack should look like, - // -SEL-GB1-RS1-GB2-RS2 - GroupByOperator gbOp = (GroupByOperator) (stack.get(stack.size() - 2)); - GroupByDesc gbDesc = gbOp.getConf(); - ArrayList aggregationDescs = gbDesc.getAggregators(); - for (AggregationDesc agg : aggregationDescs) { - if (!"bloom_filter".equals(agg.getGenericUDAFName())) { - continue; - } - - GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = - (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); - if (udafBloomFilterEvaluator.hasHintEntries()) - { - return null; // Created using hint, skip it - } - - long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries(); - if (expectedEntries == -1 || expectedEntries > - pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) { - if (sjInfo.getIsHint()) { - throw new SemanticException("Removing hinted semijoin due to lack to stats" + - " or exceeding max bloom filter entries"); - } - // Remove the semijoin optimization branch along with ALL the mappings - // The parent GB2 has all the branches. Collect them and remove them. - for (Node node : gbOp.getChildren()) { - ReduceSinkOperator rsFinal = (ReduceSinkOperator) node; - TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo(). - get(rsFinal).getTsOp(); - if (LOG.isDebugEnabled()) { - LOG.debug("expectedEntries=" + expectedEntries + ". " - + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. " - + "Removing semijoin " - + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); - } - GenTezUtils.removeBranch(rsFinal); - GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts); - } - return null; - } - } - - // At this point, hinted semijoin case has been handled already - // Check if big table is big enough that runtime filtering is - // worth it. - TableScanOperator ts = sjInfo.getTsOp(); - if (ts.getStatistics() != null) { - long numRows = ts.getStatistics().getNumRows(); - if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { - if (sjInfo.getShouldRemove()) { - if (LOG.isDebugEnabled()) { - LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin " - + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); - } - GenTezUtils.removeBranch(rs); - GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); - } - } - } - return null; - } - } - private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx) throws SemanticException { - if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { - // Not needed without semi-join reduction - return; - } - Map opRules = new LinkedHashMap(); opRules.put( new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + GroupByOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%"), - new SemiJoinRemovalIfNoStatsProc()); + new SemiJoinRemovalProc(true, false)); Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); List topNodes = new ArrayList(); topNodes.addAll(procCtx.parseContext.getTopOps().values()); @@ -1077,6 +1016,218 @@ private void markOperatorsWithUnstableRuntimeStats(OptimizeTezProcContext procCt GraphWalker ogw = new PreOrderOnceWalker(disp); ogw.startWalking(topNodes, null); } + + private class SemiJoinRemovalProc implements NodeProcessor { + + private final boolean removeBasedOnStats; + private final boolean removeRedundant; + + private SemiJoinRemovalProc (boolean removeBasedOnStats, boolean removeRedundant) { + this.removeBasedOnStats = removeBasedOnStats; + this.removeRedundant = removeRedundant; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + ReduceSinkOperator rs = (ReduceSinkOperator) nd; + RedundantSemijoinAndDppContext rCtx = (RedundantSemijoinAndDppContext) procCtx; + ParseContext pCtx = rCtx.parseContext; + SemiJoinBranchInfo sjInfo = pCtx.getRsToSemiJoinBranchInfo().get(rs); + if (sjInfo == null) { + // nothing to do here. + return null; + } + TableScanOperator targetTSOp = sjInfo.getTsOp(); + ExprNodeDesc targetColExpr = pCtx.getRsToRuntimeValuesInfoMap().get(rs).getTsColExpr(); + + // This is a semijoin branch. The stack should look like, + // -SEL-GB1-RS1-GB2-RS2 + GroupByOperator gbOp = (GroupByOperator) stack.get(stack.size() - 2); + GroupByDesc gbDesc = gbOp.getConf(); + ArrayList aggregationDescs = gbDesc.getAggregators(); + for (AggregationDesc agg : aggregationDescs) { + if (!isBloomFilterAgg(agg)) { + continue; + } + + GenericUDAFBloomFilterEvaluator udafBloomFilterEvaluator = + (GenericUDAFBloomFilterEvaluator) agg.getGenericUDAFEvaluator(); + if (udafBloomFilterEvaluator.hasHintEntries()) { + return null; // Created using hint, skip it + } + + if (removeBasedOnStats) { + long expectedEntries = udafBloomFilterEvaluator.getExpectedEntries(); + if (expectedEntries == -1 || expectedEntries > + pCtx.getConf().getLongVar(ConfVars.TEZ_MAX_BLOOM_FILTER_ENTRIES)) { + if (sjInfo.getIsHint()) { + throw new SemanticException("Removing hinted semijoin due to lack to stats" + + " or exceeding max bloom filter entries"); + } + // Remove the semijoin optimization branch along with ALL the mappings + // The parent GB2 has all the branches. Collect them and remove them. + for (Node node : gbOp.getChildren()) { + ReduceSinkOperator rsFinal = (ReduceSinkOperator) node; + TableScanOperator ts = pCtx.getRsToSemiJoinBranchInfo(). + get(rsFinal).getTsOp(); + if (LOG.isDebugEnabled()) { + LOG.debug("expectedEntries=" + expectedEntries + ". " + + "Either stats unavailable or expectedEntries exceeded max allowable bloomfilter size. " + + "Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } + GenTezUtils.removeBranch(rsFinal); + GenTezUtils.removeSemiJoinOperator(pCtx, rsFinal, ts); + } + return null; + } + } + } + + if (removeBasedOnStats) { + // At this point, hinted semijoin case has been handled already + // Check if big table is big enough that runtime filtering is + // worth it. + TableScanOperator ts = sjInfo.getTsOp(); + if (ts.getStatistics() != null) { + long numRows = ts.getStatistics().getNumRows(); + if (numRows < pCtx.getConf().getLongVar(ConfVars.TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION)) { + if (sjInfo.getShouldRemove()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Insufficient rows (" + numRows + ") to justify semijoin optimization. Removing semijoin " + + OperatorUtils.getOpNamePretty(rs) + " - " + OperatorUtils.getOpNamePretty(ts)); + } + GenTezUtils.removeBranch(rs); + GenTezUtils.removeSemiJoinOperator(pCtx, rs, ts); + } + } + } + } + + if (removeRedundant) { + // Look for RS ops above the current semijoin branch + Set rsOps = OperatorUtils.findOperators( + ((Operator) stack.get(stack.size() - 5)).getParentOperators().get(0), + ReduceSinkOperator.class); + for (Operator otherRSOp : rsOps) { + SemiJoinBranchInfo otherSjInfo = pCtx.getRsToSemiJoinBranchInfo().get(otherRSOp); + // First conjunct prevents SJ RS from removing itself + if (otherRSOp != rs && otherSjInfo != null && otherSjInfo.getTsOp() == targetTSOp) { + if (rCtx.opsToRemove.containsKey(otherRSOp)) { + // We found siblings, since we are removing the other operator, no need to remove this one + continue; + } + ExprNodeDesc otherColExpr = pCtx.getRsToRuntimeValuesInfoMap().get(otherRSOp).getTsColExpr(); + if (!otherColExpr.isSame(targetColExpr)) { + // Filter should be on the same column, otherwise we do not proceed + continue; + } + rCtx.opsToRemove.put(rs, targetTSOp); + break; + } + } + } + + return null; + } + } + + private static boolean isBloomFilterAgg(AggregationDesc agg) { + return "bloom_filter".equals(agg.getGenericUDAFName()); + } + + private static class DynamicPruningRemovalRedundantProc implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + AppMasterEventOperator event = (AppMasterEventOperator) nd; + if (!(event.getConf() instanceof DynamicPruningEventDesc)) { + return null; + } + + RedundantSemijoinAndDppContext rCtx = (RedundantSemijoinAndDppContext) procCtx; + + DynamicPruningEventDesc desc = (DynamicPruningEventDesc) event.getConf(); + TableScanOperator targetTSOp = desc.getTableScan(); + String targetColumnName = desc.getTargetColumnName(); + + // Look for event ops above the current event op branch + Operator op = event.getParentOperators().get(0); + while (op.getChildOperators().size() < 2) { + op = op.getParentOperators().get(0); + } + Set eventOps = OperatorUtils.findOperators( + op, AppMasterEventOperator.class); + for (AppMasterEventOperator otherEvent : eventOps) { + if (!(otherEvent.getConf() instanceof DynamicPruningEventDesc)) { + continue; + } + DynamicPruningEventDesc otherDesc = (DynamicPruningEventDesc) otherEvent.getConf(); + if (otherEvent != event && otherDesc.getTableScan() == targetTSOp && + otherDesc.getTargetColumnName().equals(targetColumnName)) { + if (rCtx.opsToRemove.containsKey(otherEvent)) { + // We found siblings, since we are removing the other operator, no need to remove this one + continue; + } + rCtx.opsToRemove.put(event, targetTSOp); + break; + } + } + + return null; + } + } + + private void removeRedundantSemijoinAndDpp(OptimizeTezProcContext procCtx) + throws SemanticException { + Map opRules = new LinkedHashMap<>(); + opRules.put( + new RuleRegExp("R1", GroupByOperator.getOperatorName() + "%" + + ReduceSinkOperator.getOperatorName() + "%" + + GroupByOperator.getOperatorName() + "%" + + ReduceSinkOperator.getOperatorName() + "%"), + new SemiJoinRemovalProc(false, true)); + opRules.put( + new RuleRegExp("R2", + AppMasterEventOperator.getOperatorName() + "%"), + new DynamicPruningRemovalRedundantProc()); + + // Gather + RedundantSemijoinAndDppContext ctx = + new RedundantSemijoinAndDppContext(procCtx.parseContext); + Dispatcher disp = new DefaultRuleDispatcher(null, opRules, ctx); + List topNodes = new ArrayList(); + topNodes.addAll(procCtx.parseContext.getTopOps().values()); + GraphWalker ogw = new PreOrderOnceWalker(disp); + ogw.startWalking(topNodes, null); + + // Remove + for (Map.Entry, TableScanOperator> p : ctx.opsToRemove.entrySet()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Removing redundant " + OperatorUtils.getOpNamePretty(p.getKey()) + " - " + OperatorUtils.getOpNamePretty(p.getValue())); + } + GenTezUtils.removeBranch(p.getKey()); + if (p.getKey() instanceof AppMasterEventOperator) { + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (AppMasterEventOperator) p.getKey(), p.getValue()); + } else if (p.getKey() instanceof ReduceSinkOperator) { + GenTezUtils.removeSemiJoinOperator(procCtx.parseContext, (ReduceSinkOperator) p.getKey(), p.getValue()); + } else { + throw new SemanticException("Unexpected error - type for branch could not be recognized"); + } + } + } + + private class RedundantSemijoinAndDppContext implements NodeProcessorCtx { + private final ParseContext parseContext; + private final Map, TableScanOperator> opsToRemove; + + private RedundantSemijoinAndDppContext(final ParseContext parseContext) { + this.parseContext = parseContext; + this.opsToRemove = new HashMap<>(); + } + } private boolean findParallelSemiJoinBranch(Operator mapjoin, TableScanOperator bigTableTS, ParseContext parseContext, @@ -1166,9 +1317,8 @@ private boolean findParallelSemiJoinBranch(Operator mapjoin, TableScanOperato */ private void removeSemijoinsParallelToMapJoin(OptimizeTezProcContext procCtx) throws SemanticException { - if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION) || - !procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) || - procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) { + if(!procCtx.conf.getBoolVar(ConfVars.HIVECONVERTJOIN) || + procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION_FOR_MAPJOIN)) { // Not needed without semi-join reduction or mapjoins or when semijoins // are enabled for parallel mapjoins. return; @@ -1376,11 +1526,6 @@ private static double computeBloomFilterNetBenefit( private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) throws SemanticException { - if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { - // Not needed without semi-join reduction - return; - } - List semijoinRsToRemove = new ArrayList(); Map map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); double semijoinReductionThreshold = procCtx.conf.getFloatVar( @@ -1437,11 +1582,6 @@ private void removeSemijoinOptimizationByBenefit(OptimizeTezProcContext procCtx) private void markSemiJoinForDPP(OptimizeTezProcContext procCtx) throws SemanticException { - if(!procCtx.conf.getBoolVar(ConfVars.TEZ_DYNAMIC_SEMIJOIN_REDUCTION)) { - // Not needed without semi-join reduction - return; - } - // Stores the Tablescan operators processed to avoid redoing them. Map tsOps = new HashMap<>(); Map map = procCtx.parseContext.getRsToSemiJoinBranchInfo(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index dec2d1ef38..54c6eb8d3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -26,6 +26,12 @@ import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -119,14 +125,20 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { private static class SyntheticContext implements NodeProcessorCtx { ParseContext parseContext; + boolean extended; public SyntheticContext(ParseContext pCtx) { parseContext = pCtx; + extended = parseContext.getConf().getBoolVar(ConfVars.TEZ_DYNAMIC_PARTITION_PRUNING_EXTENDED); } public ParseContext getParseContext() { return parseContext; } + + public boolean isExtended() { + return extended; + } } private static class JoinSynthetic implements NodeProcessor { @@ -134,6 +146,8 @@ public ParseContext getParseContext() { public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + SyntheticContext sCtx = (SyntheticContext) procCtx; + @SuppressWarnings("unchecked") CommonJoinOperator join = (CommonJoinOperator) nd; @@ -161,9 +175,6 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, continue; } - if (LOG.isDebugEnabled()) { - LOG.debug("Synthetic predicate: " + srcPos + " --> " + targetPos); - } ReduceSinkOperator target = (ReduceSinkOperator) parents.get(targetPos); List sourceKeys = source.getConf().getKeyCols(); List targetKeys = target.getConf().getKeyCols(); @@ -175,8 +186,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ExprNodeDesc syntheticExpr = null; for (int i = 0; i < sourceKeys.size(); ++i) { - List inArgs = new ArrayList(); - inArgs.add(sourceKeys.get(i)); + final ExprNodeDesc sourceKey = sourceKeys.get(i); + + List inArgs = new ArrayList<>(); + inArgs.add(sourceKey); ExprNodeDynamicListDesc dynamicExpr = new ExprNodeDynamicListDesc(targetKeys.get(i).getTypeInfo(), target, i); @@ -186,17 +199,36 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ExprNodeDesc syntheticInExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("in") .getGenericUDF(), inArgs); + if (LOG.isDebugEnabled()) { + LOG.debug("Synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + syntheticInExpr + ")"); + } + List andArgs = new ArrayList<>(); if (syntheticExpr != null) { - List andArgs = new ArrayList(); andArgs.add(syntheticExpr); - andArgs.add(syntheticInExpr); + } + andArgs.add(syntheticInExpr); + + if(sCtx.isExtended()) { + // Backtrack + List newExprs = createDerivatives(target.getParentOperators().get(0), targetKeys.get(i), sourceKey); + if (!newExprs.isEmpty()) { + if (LOG.isDebugEnabled()) { + for (ExprNodeDesc expr : newExprs) { + LOG.debug("Additional synthetic predicate in " + join + ": " + srcPos + " --> " + targetPos + " (" + expr + ")"); + } + } + andArgs.addAll(newExprs); + } + } + if (andArgs.size() < 2) { + syntheticExpr = syntheticInExpr; + } else { + // Create AND expression syntheticExpr = ExprNodeGenericFuncDesc.newInstance(FunctionRegistry.getFunctionInfo("and") .getGenericUDF(), andArgs); - } else { - syntheticExpr = syntheticInExpr; } } @@ -241,6 +273,128 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } return result; } + + private List createDerivatives(final Operator currentOp, + final ExprNodeDesc currentNode, final ExprNodeDesc sourceKey) throws SemanticException { + List resultExprs = new ArrayList<>(); + return createDerivatives(resultExprs, currentOp, currentNode, sourceKey) ? resultExprs : new ArrayList<>(); + } + + private boolean createDerivatives(final List resultExprs, final Operator op, + final ExprNodeDesc currentNode, final ExprNodeDesc sourceKey) throws SemanticException { + // 1. Obtain join operator upstream + Operator currentOp = op; + while (!(currentOp instanceof CommonJoinOperator)) { + if (currentOp.getParentOperators() == null || currentOp.getParentOperators().size() != 1) { + // Cannot backtrack + currentOp = null; + break; + } + if (!(currentOp instanceof FilterOperator) && + !(currentOp instanceof SelectOperator) && + !(currentOp instanceof ReduceSinkOperator) && + !(currentOp instanceof GroupByOperator)) { + // Operator not supported + currentOp = null; + break; + } + // Move the pointer + currentOp = currentOp.getParentOperators().get(0); + } + if (currentOp == null) { + // We did not find any join, we are done + return true; + } + CommonJoinOperator joinOp = (CommonJoinOperator) currentOp; + + // 2. Backtrack expression to join output + final ExprNodeDesc joinExprNode = ExprNodeDescUtils.backtrack(currentNode, op, joinOp); + if (joinExprNode == null || !(joinExprNode instanceof ExprNodeColumnDesc)) { + // We are done + return true; + } + final String columnRefJoinInput = ((ExprNodeColumnDesc)joinExprNode).getColumn(); + + // 3. Find input position in join for expression obtained + String columnOutputName = null; + for (Map.Entry e : joinOp.getColumnExprMap().entrySet()) { + if (e.getValue() == joinExprNode) { + columnOutputName = e.getKey(); + break; + } + } + if (columnOutputName == null) { + // Maybe the join is pruning columns, though it should not. + // In any case, we are done + return true; + } + final int srcPos = joinOp.getConf().getReversedExprs().get(columnOutputName); + final int[][] targets = getTargets(joinOp); + final ReduceSinkOperator rsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(srcPos); + + // 4. Find expression in input RS operator. + final Operator rsOpInput = rsOp.getParentOperators().get(0); + final ExprNodeDesc rsOpInputExprNode = rsOp.getColumnExprMap().get(columnRefJoinInput); + if (rsOpInputExprNode == null) { + // Unexpected, we just bail out and we do not infer additional predicates + return false; + } + int posInRSOpKeys = -1; + for (int i = 0; i < rsOp.getConf().getKeyCols().size(); i++) { + if (rsOpInputExprNode.isSame(rsOp.getConf().getKeyCols().get(i))) { + posInRSOpKeys = i; + break; + } + } + + // 5. If it is part of the key, we can create a new semijoin. + // In addition, we can do the same for siblings + if (posInRSOpKeys >= 0) { + // We pass the tests, we add it to the args for the AND expression + addParentReduceSink(resultExprs, rsOp, posInRSOpKeys, sourceKey); + for (int targetPos: targets[srcPos]) { + if (srcPos == targetPos) { + continue; + } + final ReduceSinkOperator otherRsOp = (ReduceSinkOperator) joinOp.getParentOperators().get(targetPos); + final Operator otherRsOpInput = otherRsOp.getParentOperators().get(0); + // We pass the tests, we add it to the args for the AND expression + addParentReduceSink(resultExprs, otherRsOp, posInRSOpKeys, sourceKey); + // We propagate to operator below + boolean success = createDerivatives( + resultExprs, otherRsOpInput, otherRsOp.getConf().getKeyCols().get(posInRSOpKeys), sourceKey); + if (!success) { + // Something went wrong, bail out + return false; + } + } + } + + // 6. Whether it was part of the key or of the value, if we reach here, we can at least + // continue propagating to operators below + boolean success = createDerivatives( + resultExprs, rsOpInput, rsOpInputExprNode, sourceKey); + if (!success) { + // Something went wrong, bail out + return false; + } + + // 7. We are done, success + return true; + } + + private void addParentReduceSink(final List andArgs, final ReduceSinkOperator rsOp, + final int keyIndex, final ExprNodeDesc sourceKey) throws SemanticException { + ExprNodeDynamicListDesc dynamicExpr = + new ExprNodeDynamicListDesc(rsOp.getConf().getKeyCols().get(keyIndex).getTypeInfo(), rsOp, keyIndex); + // Create synthetic IN expression + List inArgs = new ArrayList<>(); + inArgs.add(sourceKey); + inArgs.add(dynamicExpr); + ExprNodeDesc newNode = ExprNodeGenericFuncDesc.newInstance( + FunctionRegistry.getFunctionInfo("in").getGenericUDF(), inArgs); + andArgs.add(newNode); + } } private static class Vectors { @@ -285,4 +439,5 @@ private void traverse(Set targets, int pos) { } } } + } diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q new file mode 100644 index 0000000000..910119d447 --- /dev/null +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_sw2.q @@ -0,0 +1,59 @@ +--! qt:dataset:srcpart +--! qt:dataset:alltypesorc +set hive.compute.query.using.stats=false; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.stats.autogather=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; +set hive.stats.fetch.column.stats=true; +set hive.cbo.enable=false; +set hive.reorder.nway.joins=false; +set hive.merge.nway.joins=false; + +-- Create Tables +create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC; +create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC; +CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC; + +-- Add Partitions +alter table srcpart_date_n6 add partition (ds = "2008-04-08"); +alter table srcpart_date_n6 add partition (ds = "2008-04-09"); + +alter table srcpart_small_n2 add partition (ds1 = "2008-04-08"); +alter table srcpart_small_n2 add partition (ds1 = "2008-04-09"); + +-- Load +insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc; +insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; +insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; +insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20; + +set hive.tez.dynamic.semijoin.reduction=false; + +analyze table alltypesorc_int_n0 compute statistics for columns; +analyze table srcpart_date_n6 compute statistics for columns; +analyze table srcpart_small_n2 compute statistics for columns; + +set hive.tez.dynamic.semijoin.reduction=true; +EXPLAIN +SELECT count(*) + FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6` + JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2` + ON (srcpart_date_n6.key = srcpart_small_n2.key1) + JOIN ( + SELECT * + FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0` + JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2` + ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b + ON (srcpart_small_n2.key1 = b.cstring); + +drop table srcpart_date_n6; +drop table srcpart_small_n2; +drop table alltypesorc_int_n0; diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out new file mode 100644 index 0000000000..883bdd7006 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw2.q.out @@ -0,0 +1,450 @@ +PREHOOK: query: create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alltypesorc_int_n0 +POSTHOOK: query: create table alltypesorc_int_n0 ( cint int, cstring string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alltypesorc_int_n0 +PREHOOK: query: create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_date_n6 +POSTHOOK: query: create table srcpart_date_n6 (key string, value string) partitioned by (ds string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_date_n6 +PREHOOK: query: CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_small_n2 +POSTHOOK: query: CREATE TABLE srcpart_small_n2(key1 STRING, value1 STRING) partitioned by (ds1 string) STORED as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_small_n2 +PREHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date_n6 +POSTHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date_n6 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +PREHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date_n6 +POSTHOOK: query: alter table srcpart_date_n6 add partition (ds = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date_n6 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +PREHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small_n2 +POSTHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small_n2 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 +PREHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small_n2 +POSTHOOK: query: alter table srcpart_small_n2 add partition (ds1 = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small_n2 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +PREHOOK: query: insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@alltypesorc_int_n0 +POSTHOOK: query: insert overwrite table alltypesorc_int_n0 select cint, cstring1 from alltypesorc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@alltypesorc_int_n0 +POSTHOOK: Lineage: alltypesorc_int_n0.cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: alltypesorc_int_n0.cstring SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +PREHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +POSTHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_date_n6 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date_n6 PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_small_n2 partition (ds1 = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +POSTHOOK: Lineage: srcpart_small_n2 PARTITION(ds1=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_small_n2 PARTITION(ds1=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table alltypesorc_int_n0 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@alltypesorc_int_n0 +PREHOOK: Output: default@alltypesorc_int_n0 +#### A masked pattern was here #### +POSTHOOK: query: analyze table alltypesorc_int_n0 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@alltypesorc_int_n0 +POSTHOOK: Output: default@alltypesorc_int_n0 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_date_n6 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@srcpart_date_n6 +PREHOOK: Input: default@srcpart_date_n6@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date_n6@ds=2008-04-09 +PREHOOK: Output: default@srcpart_date_n6 +PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +PREHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_date_n6 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@srcpart_date_n6 +POSTHOOK: Input: default@srcpart_date_n6@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date_n6@ds=2008-04-09 +POSTHOOK: Output: default@srcpart_date_n6 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-08 +POSTHOOK: Output: default@srcpart_date_n6@ds=2008-04-09 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_small_n2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@srcpart_small_n2 +PREHOOK: Input: default@srcpart_small_n2@ds1=2008-04-08 +PREHOOK: Input: default@srcpart_small_n2@ds1=2008-04-09 +PREHOOK: Output: default@srcpart_small_n2 +PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 +PREHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_small_n2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@srcpart_small_n2 +POSTHOOK: Input: default@srcpart_small_n2@ds1=2008-04-08 +POSTHOOK: Input: default@srcpart_small_n2@ds1=2008-04-09 +POSTHOOK: Output: default@srcpart_small_n2 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-08 +POSTHOOK: Output: default@srcpart_small_n2@ds1=2008-04-09 +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN +SELECT count(*) + FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6` + JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2` + ON (srcpart_date_n6.key = srcpart_small_n2.key1) + JOIN ( + SELECT * + FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0` + JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2` + ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b + ON (srcpart_small_n2.key1 = b.cstring) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT count(*) + FROM (SELECT * FROM srcpart_date_n6 WHERE ds = "2008-04-09") `srcpart_date_n6` + JOIN (SELECT * FROM srcpart_small_n2 WHERE ds1 = "2008-04-08") `srcpart_small_n2` + ON (srcpart_date_n6.key = srcpart_small_n2.key1) + JOIN ( + SELECT * + FROM (SELECT * FROM alltypesorc_int_n0 WHERE cint = 10) `alltypesorc_int_n0` + JOIN (SELECT * FROM srcpart_small_n2) `srcpart_small_n2` + ON (alltypesorc_int_n0.cstring = srcpart_small_n2.key1)) b + ON (srcpart_small_n2.key1 = b.cstring) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 6 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 11 <- Reducer 10 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 10 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 11 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date_n6 + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and (key BETWEEN DynamicValue(RS_25_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_25_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key, DynamicValue(RS_25_alltypesorc_int_n0_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and (key BETWEEN DynamicValue(RS_25_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_25_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key, DynamicValue(RS_25_alltypesorc_int_n0_cstring_bloom_filter))) and key is not null) (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: srcpart_small_n2 + filterExpr: (key1 is not null and (key1 BETWEEN DynamicValue(RS_12_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_12_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_12_alltypesorc_int_n0_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key1, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Filter Operator + predicate: ((key1 BETWEEN DynamicValue(RS_12_alltypesorc_int_n0_cstring_min) AND DynamicValue(RS_12_alltypesorc_int_n0_cstring_max) and in_bloom_filter(key1, DynamicValue(RS_12_alltypesorc_int_n0_cstring_bloom_filter))) and (key1 BETWEEN DynamicValue(RS_20_srcpart_small_n2_key1_min) AND DynamicValue(RS_20_srcpart_small_n2_key1_max) and in_bloom_filter(key1, DynamicValue(RS_20_srcpart_small_n2_key1_bloom_filter))) and key1 is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: srcpart_small_n2 + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 87 Basic stats: PARTIAL Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 7 + Map Operator Tree: + TableScan + alias: alltypesorc_int_n0 + filterExpr: ((cint = 10) and cstring is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((cint = 10) and cstring is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 98 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col3 + Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col1 (type: string) + Statistics: Num rows: 1210 Data size: 105270 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 22 Data size: 1914 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=22) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=22) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table srcpart_date_n6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_date_n6 +PREHOOK: Output: default@srcpart_date_n6 +POSTHOOK: query: drop table srcpart_date_n6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_date_n6 +POSTHOOK: Output: default@srcpart_date_n6 +PREHOOK: query: drop table srcpart_small_n2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@srcpart_small_n2 +PREHOOK: Output: default@srcpart_small_n2 +POSTHOOK: query: drop table srcpart_small_n2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@srcpart_small_n2 +POSTHOOK: Output: default@srcpart_small_n2 +PREHOOK: query: drop table alltypesorc_int_n0 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alltypesorc_int_n0 +PREHOOK: Output: default@alltypesorc_int_n0 +POSTHOOK: query: drop table alltypesorc_int_n0 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alltypesorc_int_n0 +POSTHOOK: Output: default@alltypesorc_int_n0 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index f87fe36e11..6a2ae6206c 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -5347,8 +5347,8 @@ Stage-0 Stage-1 Map 3 llap File Output Operator [FS_21] - Map Join Operator [MAPJOIN_67] (rows=2 width=404) - Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_66._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Map Join Operator [MAPJOIN_71] (rows=2 width=404) + Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_70._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 1 [BROADCAST_EDGE] llap BROADCAST [RS_16] PartitionCols:_col0 @@ -5367,7 +5367,7 @@ Stage-0 predicate:key is not null TableScan [TS_3] (rows=1 width=368) default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map Join Operator [MAPJOIN_66] (rows=1 width=404) + <-Map Join Operator [MAPJOIN_70] (rows=1 width=404) Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 4 [BROADCAST_EDGE] llap BROADCAST [RS_13] @@ -5408,8 +5408,8 @@ Stage-0 Stage-1 Map 3 llap File Output Operator [FS_21] - Map Join Operator [MAPJOIN_67] (rows=2 width=404) - Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_66._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Map Join Operator [MAPJOIN_71] (rows=2 width=404) + Conds:RS_16._col0=RS_17._col0(Inner),RS_17._col0=MAPJOIN_70._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 1 [BROADCAST_EDGE] llap BROADCAST [RS_16] PartitionCols:_col0 @@ -5428,7 +5428,7 @@ Stage-0 predicate:key is not null TableScan [TS_3] (rows=1 width=368) default@t2_n70,b,Tbl:COMPLETE,Col:NONE,Output:["key","val"] - <-Map Join Operator [MAPJOIN_66] (rows=1 width=404) + <-Map Join Operator [MAPJOIN_70] (rows=1 width=404) Conds:SEL_8._col0=RS_13._col0(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 4 [BROADCAST_EDGE] llap BROADCAST [RS_13] diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index 6987a96809..74fc2e89a3 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -424,7 +424,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@l3_monthly_dw_dimplan POSTHOOK: Output: default@l3_monthly_dw_dimplan #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join @@ -873,7 +873,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1 @@ -915,7 +915,7 @@ POSTHOOK: Input: default@l3_monthly_dw_dimplan 7147200 NULL 27114 7147200 NULL 27114 7147200 NULL 27114 -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN EXTENDED SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join @@ -1365,7 +1365,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[48][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT DW.PROJECT_OBJECT_ID, S1.PLAN_KEY as PLAN_KEY, S2.PROJECT_KEY AS PROJECT_KEY FROM l3_clarity__L3_SNAP_NUMBER_2018022300104 snap inner join l3_clarity__L3_MONTHLY_DW_FACTPLAN_DW_STG_2018022300104_1 DW on 1=1 diff --git a/ql/src/test/results/clientpositive/perf/tez/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/query1.q.out index 579940c66e..58c422d556 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query1.q.out @@ -63,10 +63,10 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_159] - Limit [LIM_158] (rows=100 width=860) + File Output Operator [FS_161] + Limit [LIM_160] (rows=100 width=860) Number of rows:100 - Select Operator [SEL_157] (rows=32266667 width=860) + Select Operator [SEL_159] (rows=32266667 width=860) Output:["_col0"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_50] @@ -74,96 +74,96 @@ Stage-0 Output:["_col0"] Filter Operator [FIL_48] (rows=32266667 width=860) predicate:(_col2 > _col7) - Merge Join Operator [MERGEJOIN_132] (rows=96800003 width=860) - Conds:RS_45._col1=RS_156._col1(Inner),Output:["_col2","_col6","_col7"] + Merge Join Operator [MERGEJOIN_134] (rows=96800003 width=860) + Conds:RS_45._col1=RS_158._col1(Inner),Output:["_col2","_col6","_col7"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_130] (rows=88000001 width=860) - Conds:RS_42._col0=RS_151._col0(Inner),Output:["_col1","_col2","_col6"] + Merge Join Operator [MERGEJOIN_132] (rows=88000001 width=860) + Conds:RS_42._col0=RS_153._col0(Inner),Output:["_col1","_col2","_col6"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] + SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_150] (rows=80000000 width=860) + Select Operator [SEL_152] (rows=80000000 width=860) Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=80000000 width=860) + Filter Operator [FIL_151] (rows=80000000 width=860) predicate:c_customer_sk is not null TableScan [TS_17] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_customer_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_129] (rows=34842647 width=77) - Conds:RS_145._col1=RS_148._col0(Inner),Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_131] (rows=34842647 width=77) + Conds:RS_147._col1=RS_150._col0(Inner),Output:["_col0","_col1","_col2"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_147] (rows=852 width=1910) + Select Operator [SEL_149] (rows=852 width=1910) Output:["_col0"] - Filter Operator [FIL_146] (rows=852 width=1910) + Filter Operator [FIL_148] (rows=852 width=1910) predicate:((s_state = 'NM') and s_store_sk is not null) TableScan [TS_14] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] + SHUFFLE [RS_147] PartitionCols:_col1 - Select Operator [SEL_144] (rows=31675133 width=77) + Select Operator [SEL_146] (rows=31675133 width=77) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_143] (rows=31675133 width=77) + Group By Operator [GBY_145] (rows=31675133 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_11] PartitionCols:_col0, _col1 Group By Operator [GBY_10] (rows=63350266 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_128] (rows=63350266 width=77) - Conds:RS_137._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_130] (rows=63350266 width=77) + Conds:RS_139._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] + SHUFFLE [RS_139] PartitionCols:_col0 - Select Operator [SEL_135] (rows=57591150 width=77) + Select Operator [SEL_137] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_133] (rows=57591150 width=77) + Filter Operator [FIL_135] (rows=57591150 width=77) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_store_sk is not null) TableScan [TS_0] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_140] (rows=36524 width=1119) + Select Operator [SEL_142] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_139] (rows=36524 width=1119) + Filter Operator [FIL_141] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_156] + SHUFFLE [RS_158] PartitionCols:_col1 - Select Operator [SEL_155] (rows=15837566 width=77) + Select Operator [SEL_157] (rows=15837566 width=77) Output:["_col0","_col1"] - Group By Operator [GBY_154] (rows=15837566 width=77) + Group By Operator [GBY_156] (rows=15837566 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Select Operator [SEL_153] (rows=31675133 width=77) + Select Operator [SEL_155] (rows=31675133 width=77) Output:["_col1","_col2"] - Group By Operator [GBY_152] (rows=31675133 width=77) + Group By Operator [GBY_154] (rows=31675133 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col0 Group By Operator [GBY_30] (rows=63350266 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1 - Merge Join Operator [MERGEJOIN_131] (rows=63350266 width=77) - Conds:RS_138._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_133] (rows=63350266 width=77) + Conds:RS_140._col0=RS_144._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_138] + SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_136] (rows=57591150 width=77) + Select Operator [SEL_138] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_134] (rows=57591150 width=77) + Filter Operator [FIL_136] (rows=57591150 width=77) predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) Please refer to the previous TableScan [TS_0] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_142] + SHUFFLE [RS_144] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_140] + Please refer to the previous Select Operator [SEL_142] diff --git a/ql/src/test/results/clientpositive/perf/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tez/query16.q.out index 0b64c55b0f..5652f3b019 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query16.q.out @@ -80,22 +80,22 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_174] - Limit [LIM_173] (rows=1 width=344) + File Output Operator [FS_176] + Limit [LIM_175] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_172] (rows=1 width=344) + Select Operator [SEL_174] (rows=1 width=344) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_171] - Select Operator [SEL_170] (rows=1 width=344) + SHUFFLE [RS_173] + Select Operator [SEL_172] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_169] (rows=1 width=344) + Group By Operator [GBY_171] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_168] - Group By Operator [GBY_167] (rows=1 width=344) + PARTITION_ONLY_SHUFFLE [RS_170] + Group By Operator [GBY_169] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_166] (rows=231905279 width=135) + Group By Operator [GBY_168] (rows=231905279 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_74] @@ -106,21 +106,21 @@ Stage-0 Output:["_col4","_col5","_col6"] Filter Operator [FIL_41] (rows=231905279 width=135) predicate:_col14 is null - Merge Join Operator [MERGEJOIN_128] (rows=463810558 width=135) - Conds:RS_38._col4=RS_165._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + Merge Join Operator [MERGEJOIN_130] (rows=463810558 width=135) + Conds:RS_38._col4=RS_167._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_165] + FORWARD [RS_167] PartitionCols:_col0 - Select Operator [SEL_164] (rows=14399440 width=106) + Select Operator [SEL_166] (rows=14399440 width=106) Output:["_col0","_col1"] - Group By Operator [GBY_163] (rows=14399440 width=106) + Group By Operator [GBY_165] (rows=14399440 width=106) Output:["_col0"],keys:KEY._col0 <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_164] PartitionCols:_col0 - Group By Operator [GBY_161] (rows=28798881 width=106) + Group By Operator [GBY_163] (rows=28798881 width=106) Output:["_col0"],keys:cr_order_number - Filter Operator [FIL_160] (rows=28798881 width=106) + Filter Operator [FIL_162] (rows=28798881 width=106) predicate:cr_order_number is not null TableScan [TS_25] (rows=28798881 width=106) default@catalog_returns,cr1,Tbl:COMPLETE,Col:NONE,Output:["cr_order_number"] @@ -129,101 +129,101 @@ Stage-0 PartitionCols:_col4 Select Operator [SEL_37] (rows=421645953 width=135) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_127] (rows=421645953 width=135) - Conds:RS_34._col4=RS_159._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + Merge Join Operator [MERGEJOIN_129] (rows=421645953 width=135) + Conds:RS_34._col4=RS_161._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_161] PartitionCols:_col0 - Group By Operator [GBY_158] (rows=287989836 width=135) + Group By Operator [GBY_160] (rows=287989836 width=135) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_157] (rows=287989836 width=135) + Select Operator [SEL_159] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=287989836 width=135) + Filter Operator [FIL_158] (rows=287989836 width=135) predicate:(cs_order_number is not null and cs_warehouse_sk is not null) TableScan [TS_22] (rows=287989836 width=135) default@catalog_sales,cs2,Tbl:COMPLETE,Col:NONE,Output:["cs_warehouse_sk","cs_order_number"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_126] (rows=383314495 width=135) - Conds:RS_18._col2=RS_147._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_128] (rows=383314495 width=135) + Conds:RS_18._col2=RS_149._col0(Inner),Output:["_col3","_col4","_col5","_col6"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_146] (rows=30 width=2045) + Select Operator [SEL_148] (rows=30 width=2045) Output:["_col0"] - Filter Operator [FIL_145] (rows=30 width=2045) + Filter Operator [FIL_147] (rows=30 width=2045) predicate:((cc_county) IN ('Ziebach County', 'Levy County', 'Huron County', 'Franklin Parish', 'Daviess County') and cc_call_center_sk is not null) TableScan [TS_9] (rows=60 width=2045) default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_county"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_125] (rows=348467716 width=135) - Conds:RS_15._col1=RS_139._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_127] (rows=348467716 width=135) + Conds:RS_15._col1=RS_141._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_138] (rows=20000000 width=1014) + Select Operator [SEL_140] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_137] (rows=20000000 width=1014) + Filter Operator [FIL_139] (rows=20000000 width=1014) predicate:((ca_state = 'NY') and ca_address_sk is not null) TableScan [TS_6] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_124] (rows=316788826 width=135) - Conds:RS_155._col0=RS_131._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_126] (rows=316788826 width=135) + Conds:RS_157._col0=RS_133._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] + SHUFFLE [RS_133] PartitionCols:_col0 - Select Operator [SEL_130] (rows=8116 width=1119) + Select Operator [SEL_132] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_129] (rows=8116 width=1119) + Filter Operator [FIL_131] (rows=8116 width=1119) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_155] + SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_154] (rows=287989836 width=135) + Select Operator [SEL_156] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_153] (rows=287989836 width=135) + Filter Operator [FIL_155] (rows=287989836 width=135) predicate:((cs_call_center_sk BETWEEN DynamicValue(RS_19_call_center_cc_call_center_sk_min) AND DynamicValue(RS_19_call_center_cc_call_center_sk_max) and in_bloom_filter(cs_call_center_sk, DynamicValue(RS_19_call_center_cc_call_center_sk_bloom_filter))) and (cs_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(cs_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_call_center_sk is not null and cs_order_number is not null and cs_ship_addr_sk is not null and cs_ship_date_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,cs1,Tbl:COMPLETE,Col:NONE,Output:["cs_ship_date_sk","cs_ship_addr_sk","cs_call_center_sk","cs_warehouse_sk","cs_order_number","cs_ext_ship_cost","cs_net_profit"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_136] - Group By Operator [GBY_135] (rows=1 width=12) + BROADCAST [RS_138] + Group By Operator [GBY_137] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) + SHUFFLE [RS_136] + Group By Operator [GBY_135] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_132] (rows=8116 width=1119) + Select Operator [SEL_134] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_130] + Please refer to the previous Select Operator [SEL_132] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) + BROADCAST [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_142] - Group By Operator [GBY_141] (rows=1 width=12) + SHUFFLE [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_140] (rows=20000000 width=1014) + Select Operator [SEL_142] (rows=20000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_138] + Please refer to the previous Select Operator [SEL_140] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_152] - Group By Operator [GBY_151] (rows=1 width=12) + BROADCAST [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] - Group By Operator [GBY_149] (rows=1 width=12) + SHUFFLE [RS_152] + Group By Operator [GBY_151] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_148] (rows=30 width=2045) + Select Operator [SEL_150] (rows=30 width=2045) Output:["_col0"] - Please refer to the previous Select Operator [SEL_146] + Please refer to the previous Select Operator [SEL_148] diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out index 2e5e254b2d..620d88ac96 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -89,8 +89,8 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 17 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) -Map 18 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 18 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) Reducer 10 <- Map 18 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) Reducer 12 <- Map 8 (CUSTOM_SIMPLE_EDGE) @@ -112,16 +112,16 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_256] - Limit [LIM_255] (rows=100 width=88) + File Output Operator [FS_269] + Limit [LIM_268] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_254] (rows=421657640 width=88) + Select Operator [SEL_267] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_253] - Select Operator [SEL_252] (rows=421657640 width=88) + SHUFFLE [RS_266] + Select Operator [SEL_265] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_251] (rows=421657640 width=88) + Group By Operator [GBY_264] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_50] @@ -130,172 +130,181 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 Select Operator [SEL_47] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_202] (rows=843315281 width=88) - Conds:RS_44._col3=RS_230._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] + Merge Join Operator [MERGEJOIN_212] (rows=843315281 width=88) + Conds:RS_44._col3=RS_250._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] + SHUFFLE [RS_250] PartitionCols:_col0 - Select Operator [SEL_229] (rows=1704 width=1910) + Select Operator [SEL_249] (rows=1704 width=1910) Output:["_col0","_col1"] - Filter Operator [FIL_228] (rows=1704 width=1910) + Filter Operator [FIL_248] (rows=1704 width=1910) predicate:s_store_sk is not null TableScan [TS_32] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_201] (rows=766650239 width=88) + Merge Join Operator [MERGEJOIN_211] (rows=766650239 width=88) Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col7, _col8, _col9 - Merge Join Operator [MERGEJOIN_200] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_210] (rows=348467716 width=135) Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] <-Reducer 13 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_29] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_199] (rows=63350266 width=77) - Conds:RS_243._col0=RS_213._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_209] (rows=63350266 width=77) + Conds:RS_241._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_213] + PARTITION_ONLY_SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_208] (rows=36525 width=1119) + Select Operator [SEL_218] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_205] (rows=36525 width=1119) + Filter Operator [FIL_215] (rows=36525 width=1119) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] + SHUFFLE [RS_241] PartitionCols:_col0 - Select Operator [SEL_242] (rows=57591150 width=77) + Select Operator [SEL_240] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_241] (rows=57591150 width=77) + Filter Operator [FIL_239] (rows=57591150 width=77) predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) TableScan [TS_15] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_198] (rows=316788826 width=135) - Conds:RS_250._col0=RS_211._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_208] (rows=316788826 width=135) + Conds:RS_263._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_211] + PARTITION_ONLY_SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_207] (rows=36525 width=1119) + Select Operator [SEL_217] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_204] (rows=36525 width=1119) + Filter Operator [FIL_214] (rows=36525 width=1119) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] + SHUFFLE [RS_263] PartitionCols:_col0 - Select Operator [SEL_249] (rows=287989836 width=135) + Select Operator [SEL_262] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_248] (rows=287989836 width=135) - predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + Filter Operator [FIL_261] (rows=287989836 width=135) + predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_9] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_240] - Group By Operator [GBY_239] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_217] - Group By Operator [GBY_215] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_212] (rows=36525 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_207] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_245] - Group By Operator [GBY_244] (rows=1 width=12) + BROADCAST [RS_244] + Group By Operator [GBY_242] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_141] - Group By Operator [GBY_140] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_139] (rows=63350266 width=77) + Select Operator [SEL_108] (rows=63350266 width=77) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_199] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] <-Reducer 15 [BROADCAST_EDGE] vectorized BROADCAST [RS_247] - Group By Operator [GBY_246] (rows=1 width=12) + Group By Operator [GBY_245] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_146] - Group By Operator [GBY_145] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_125] + Group By Operator [GBY_124] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_144] (rows=63350266 width=77) + Select Operator [SEL_123] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] + <-Reducer 17 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Group By Operator [GBY_236] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_235] + Group By Operator [GBY_234] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_233] (rows=462000 width=1436) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_199] + Select Operator [SEL_231] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_230] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_260] + Group By Operator [GBY_259] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_227] + Group By Operator [GBY_225] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_222] (rows=36525 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_217] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_197] (rows=696954748 width=88) - Conds:RS_38._col1=RS_222._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] + Merge Join Operator [MERGEJOIN_207] (rows=696954748 width=88) + Conds:RS_38._col1=RS_232._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] + SHUFFLE [RS_232] PartitionCols:_col0 - Select Operator [SEL_221] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_220] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + Please refer to the previous Select Operator [SEL_231] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_196] (rows=633595212 width=88) - Conds:RS_238._col0=RS_209._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_206] (rows=633595212 width=88) + Conds:RS_258._col0=RS_219._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_209] + PARTITION_ONLY_SHUFFLE [RS_219] PartitionCols:_col0 - Select Operator [SEL_206] (rows=36524 width=1119) + Select Operator [SEL_216] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_203] (rows=36524 width=1119) + Filter Operator [FIL_213] (rows=36524 width=1119) predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] + SHUFFLE [RS_258] PartitionCols:_col0 - Select Operator [SEL_237] (rows=575995635 width=88) + Select Operator [SEL_257] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_236] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + Filter Operator [FIL_256] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_29_store_returns_sr_customer_sk_min) AND DynamicValue(RS_29_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_29_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_29_store_returns_sr_item_sk_min) AND DynamicValue(RS_29_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_29_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_243] + Please refer to the previous Group By Operator [GBY_242] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_246] + Please refer to the previous Group By Operator [GBY_245] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_227] - Group By Operator [GBY_226] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_223] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_221] + BROADCAST [RS_237] + Please refer to the previous Group By Operator [GBY_236] <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_235] - Group By Operator [GBY_234] (rows=1 width=12) + BROADCAST [RS_255] + Group By Operator [GBY_254] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] - Group By Operator [GBY_232] (rows=1 width=12) + SHUFFLE [RS_253] + Group By Operator [GBY_252] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_231] (rows=1704 width=1910) + Select Operator [SEL_251] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_229] + Please refer to the previous Select Operator [SEL_249] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_219] - Group By Operator [GBY_218] (rows=1 width=12) + BROADCAST [RS_229] + Group By Operator [GBY_228] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_216] - Group By Operator [GBY_214] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_226] + Group By Operator [GBY_224] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_210] (rows=36524 width=1119) + Select Operator [SEL_220] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_206] + Please refer to the previous Select Operator [SEL_216] diff --git a/ql/src/test/results/clientpositive/perf/tez/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/query18.q.out index e8585275b4..1b9b2fba02 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query18.q.out @@ -86,16 +86,16 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_187] - Limit [LIM_186] (rows=100 width=135) + File Output Operator [FS_189] + Limit [LIM_188] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_185] (rows=1054114882 width=135) + Select Operator [SEL_187] (rows=1054114882 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] - Select Operator [SEL_183] (rows=1054114882 width=135) + SHUFFLE [RS_186] + Select Operator [SEL_185] (rows=1054114882 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Group By Operator [GBY_182] (rows=1054114882 width=135) + Group By Operator [GBY_184] (rows=1054114882 width=135) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_43] @@ -104,42 +104,42 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)","sum(_col10)","count(_col10)"],keys:_col0, _col1, _col2, _col3, 0L Select Operator [SEL_40] (rows=421645953 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Merge Join Operator [MERGEJOIN_143] (rows=421645953 width=135) + Merge Join Operator [MERGEJOIN_145] (rows=421645953 width=135) Conds:RS_37._col0=RS_38._col3(Inner),Output:["_col4","_col6","_col7","_col8","_col11","_col16","_col17","_col18","_col19","_col20","_col26"] <-Reducer 3 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_37] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_139] (rows=48400001 width=860) - Conds:RS_34._col1=RS_152._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_141] (rows=48400001 width=860) + Conds:RS_34._col1=RS_154._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_152] + SHUFFLE [RS_154] PartitionCols:_col0 - Select Operator [SEL_151] (rows=1861800 width=385) + Select Operator [SEL_153] (rows=1861800 width=385) Output:["_col0"] - Filter Operator [FIL_150] (rows=1861800 width=385) + Filter Operator [FIL_152] (rows=1861800 width=385) predicate:cd_demo_sk is not null TableScan [TS_6] (rows=1861800 width=385) default@customer_demographics,cd2,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_138] (rows=44000000 width=860) - Conds:RS_146._col2=RS_149._col0(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_140] (rows=44000000 width=860) + Conds:RS_148._col2=RS_151._col0(Inner),Output:["_col0","_col1","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] + SHUFFLE [RS_148] PartitionCols:_col2 - Select Operator [SEL_145] (rows=40000000 width=860) + Select Operator [SEL_147] (rows=40000000 width=860) Output:["_col0","_col1","_col2","_col4"] - Filter Operator [FIL_144] (rows=40000000 width=860) + Filter Operator [FIL_146] (rows=40000000 width=860) predicate:((c_birth_month) IN (9, 5, 12, 4, 1, 10) and c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk","c_birth_month","c_birth_year"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_151] PartitionCols:_col0 - Select Operator [SEL_148] (rows=20000000 width=1014) + Select Operator [SEL_150] (rows=20000000 width=1014) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_147] (rows=20000000 width=1014) + Filter Operator [FIL_149] (rows=20000000 width=1014) predicate:((ca_state) IN ('ND', 'WI', 'AL', 'NC', 'OK', 'MS', 'TN') and ca_address_sk is not null) TableScan [TS_3] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state","ca_country"] @@ -148,96 +148,96 @@ Stage-0 PartitionCols:_col3 Select Operator [SEL_30] (rows=383314495 width=135) Output:["_col1","_col3","_col6","_col7","_col8","_col9","_col10","_col16"] - Merge Join Operator [MERGEJOIN_142] (rows=383314495 width=135) - Conds:RS_27._col3=RS_171._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col14","_col16"] + Merge Join Operator [MERGEJOIN_144] (rows=383314495 width=135) + Conds:RS_27._col3=RS_173._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col14","_col16"] <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_171] + PARTITION_ONLY_SHUFFLE [RS_173] PartitionCols:_col0 - Select Operator [SEL_170] (rows=462000 width=1436) + Select Operator [SEL_172] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_169] (rows=462000 width=1436) + Filter Operator [FIL_171] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_18] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_141] (rows=348467716 width=135) - Conds:RS_24._col2=RS_163._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col14"] + Merge Join Operator [MERGEJOIN_143] (rows=348467716 width=135) + Conds:RS_24._col2=RS_165._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col14"] <-Map 16 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_163] + PARTITION_ONLY_SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_162] (rows=465450 width=385) + Select Operator [SEL_164] (rows=465450 width=385) Output:["_col0","_col3"] - Filter Operator [FIL_161] (rows=465450 width=385) + Filter Operator [FIL_163] (rows=465450 width=385) predicate:((cd_education_status = 'College') and (cd_gender = 'M') and cd_demo_sk is not null) TableScan [TS_15] (rows=1861800 width=385) default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_education_status","cd_dep_count"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_140] (rows=316788826 width=135) - Conds:RS_181._col0=RS_155._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_142] (rows=316788826 width=135) + Conds:RS_183._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_155] + PARTITION_ONLY_SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_154] (rows=36524 width=1119) + Select Operator [SEL_156] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_153] (rows=36524 width=1119) + Filter Operator [FIL_155] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_181] + SHUFFLE [RS_183] PartitionCols:_col0 - Select Operator [SEL_180] (rows=287989836 width=135) + Select Operator [SEL_182] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_179] (rows=287989836 width=135) + Filter Operator [FIL_181] (rows=287989836 width=135) predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_25_cd1_cd_demo_sk_min) AND DynamicValue(RS_25_cd1_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_25_cd1_cd_demo_sk_bloom_filter))) and (cs_bill_customer_sk BETWEEN DynamicValue(RS_37_customer_c_customer_sk_min) AND DynamicValue(RS_37_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_37_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_28_item_i_item_sk_min) AND DynamicValue(RS_28_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_28_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_9] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_bill_cdemo_sk","cs_item_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt","cs_net_profit"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_160] - Group By Operator [GBY_159] (rows=1 width=12) + BROADCAST [RS_162] + Group By Operator [GBY_161] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_158] - Group By Operator [GBY_157] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_156] (rows=36524 width=1119) + Select Operator [SEL_158] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_154] + Please refer to the previous Select Operator [SEL_156] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_168] - Group By Operator [GBY_167] (rows=1 width=12) + BROADCAST [RS_170] + Group By Operator [GBY_169] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_166] - Group By Operator [GBY_165] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_168] + Group By Operator [GBY_167] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_164] (rows=465450 width=385) + Select Operator [SEL_166] (rows=465450 width=385) Output:["_col0"] - Please refer to the previous Select Operator [SEL_162] + Please refer to the previous Select Operator [SEL_164] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_176] - Group By Operator [GBY_175] (rows=1 width=12) + BROADCAST [RS_178] + Group By Operator [GBY_177] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_174] - Group By Operator [GBY_173] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_176] + Group By Operator [GBY_175] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_172] (rows=462000 width=1436) + Select Operator [SEL_174] (rows=462000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_170] + Please refer to the previous Select Operator [SEL_172] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_178] - Group By Operator [GBY_177] (rows=1 width=12) + BROADCAST [RS_180] + Group By Operator [GBY_179] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=48400000)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_120] - Group By Operator [GBY_119] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_126] + Group By Operator [GBY_125] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=48400000)"] - Select Operator [SEL_118] (rows=48400001 width=860) + Select Operator [SEL_124] (rows=48400001 width=860) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_139] + Please refer to the previous Merge Join Operator [MERGEJOIN_141] diff --git a/ql/src/test/results/clientpositive/perf/tez/query2.q.out b/ql/src/test/results/clientpositive/perf/tez/query2.q.out index d24899ccf3..5f908948aa 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query2.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query2.q.out @@ -139,33 +139,33 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_191] - Select Operator [SEL_190] (rows=287491028 width=135) + File Output Operator [FS_195] + Select Operator [SEL_194] (rows=287491028 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_58] Select Operator [SEL_57] (rows=287491028 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_143] (rows=287491028 width=135) + Merge Join Operator [MERGEJOIN_147] (rows=287491028 width=135) Conds:RS_54._col0=RS_55.(_col0 - 53)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:(_col0 - 53) - Merge Join Operator [MERGEJOIN_142] (rows=261355475 width=135) - Conds:RS_189._col0=RS_187._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_146] (rows=261355475 width=135) + Conds:RS_193._col0=RS_191._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] + SHUFFLE [RS_191] PartitionCols:_col0 - Select Operator [SEL_185] (rows=36524 width=1119) + Select Operator [SEL_189] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_183] (rows=36524 width=1119) + Filter Operator [FIL_187] (rows=36524 width=1119) predicate:((d_year = 2002) and d_week_seq is not null) TableScan [TS_20] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_week_seq","d_year"] <-Reducer 12 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_189] + FORWARD [RS_193] PartitionCols:_col0 - Group By Operator [GBY_188] (rows=237595882 width=135) + Group By Operator [GBY_192] (rows=237595882 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_44] @@ -174,67 +174,67 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 Select Operator [SEL_41] (rows=475191764 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_141] (rows=475191764 width=135) - Conds:Union 17._col0=RS_168._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_145] (rows=475191764 width=135) + Conds:Union 17._col0=RS_172._col0(Inner),Output:["_col1","_col3","_col4"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_168] + SHUFFLE [RS_172] PartitionCols:_col0 - Select Operator [SEL_165] (rows=73049 width=1119) + Select Operator [SEL_169] (rows=73049 width=1119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_164] (rows=73049 width=1119) + Filter Operator [FIL_168] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_week_seq is not null) TableScan [TS_8] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq","d_day_name"] <-Union 17 [SIMPLE_EDGE] <-Map 16 [CONTAINS] vectorized - Reduce Output Operator [RS_200] + Reduce Output Operator [RS_204] PartitionCols:_col0 - Select Operator [SEL_199] (rows=144002668 width=135) + Select Operator [SEL_203] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_198] (rows=144002668 width=135) + Filter Operator [FIL_202] (rows=144002668 width=135) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_154] (rows=144002668 width=135) + TableScan [TS_158] (rows=144002668 width=135) Output:["ws_sold_date_sk","ws_ext_sales_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_196] - Group By Operator [GBY_195] (rows=1 width=12) + BROADCAST [RS_200] + Group By Operator [GBY_199] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_173] - Group By Operator [GBY_171] (rows=1 width=12) + SHUFFLE [RS_177] + Group By Operator [GBY_175] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_169] (rows=73049 width=1119) + Select Operator [SEL_173] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_165] + Please refer to the previous Select Operator [SEL_169] <-Map 18 [CONTAINS] vectorized - Reduce Output Operator [RS_203] + Reduce Output Operator [RS_207] PartitionCols:_col0 - Select Operator [SEL_202] (rows=287989836 width=135) + Select Operator [SEL_206] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_201] (rows=287989836 width=135) + Filter Operator [FIL_205] (rows=287989836 width=135) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_159] (rows=287989836 width=135) + TableScan [TS_163] (rows=287989836 width=135) Output:["cs_sold_date_sk","cs_ext_sales_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_197] - Please refer to the previous Group By Operator [GBY_195] + BROADCAST [RS_201] + Please refer to the previous Group By Operator [GBY_199] <-Reducer 5 [ONE_TO_ONE_EDGE] FORWARD [RS_54] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_140] (rows=261355475 width=135) - Conds:RS_181._col0=RS_186._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_144] (rows=261355475 width=135) + Conds:RS_185._col0=RS_190._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + SHUFFLE [RS_190] PartitionCols:_col0 - Select Operator [SEL_184] (rows=36524 width=1119) + Select Operator [SEL_188] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_182] (rows=36524 width=1119) + Filter Operator [FIL_186] (rows=36524 width=1119) predicate:((d_year = 2001) and d_week_seq is not null) Please refer to the previous TableScan [TS_20] <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_181] + FORWARD [RS_185] PartitionCols:_col0 - Group By Operator [GBY_180] (rows=237595882 width=135) + Group By Operator [GBY_184] (rows=237595882 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] @@ -243,43 +243,43 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)"],keys:_col0 Select Operator [SEL_14] (rows=475191764 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_139] (rows=475191764 width=135) - Conds:Union 2._col0=RS_166._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_143] (rows=475191764 width=135) + Conds:Union 2._col0=RS_170._col0(Inner),Output:["_col1","_col3","_col4"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] + SHUFFLE [RS_170] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_165] + Please refer to the previous Select Operator [SEL_169] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_179] + Reduce Output Operator [RS_183] PartitionCols:_col0 - Select Operator [SEL_178] (rows=144002668 width=135) + Select Operator [SEL_182] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_177] (rows=144002668 width=135) + Filter Operator [FIL_181] (rows=144002668 width=135) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) - TableScan [TS_144] (rows=144002668 width=135) + TableScan [TS_148] (rows=144002668 width=135) Output:["ws_sold_date_sk","ws_ext_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_175] - Group By Operator [GBY_174] (rows=1 width=12) + BROADCAST [RS_179] + Group By Operator [GBY_178] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] - Group By Operator [GBY_170] (rows=1 width=12) + SHUFFLE [RS_176] + Group By Operator [GBY_174] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_167] (rows=73049 width=1119) + Select Operator [SEL_171] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_165] + Please refer to the previous Select Operator [SEL_169] <-Map 8 [CONTAINS] vectorized - Reduce Output Operator [RS_194] + Reduce Output Operator [RS_198] PartitionCols:_col0 - Select Operator [SEL_193] (rows=287989836 width=135) + Select Operator [SEL_197] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_192] (rows=287989836 width=135) + Filter Operator [FIL_196] (rows=287989836 width=135) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) - TableScan [TS_149] (rows=287989836 width=135) + TableScan [TS_153] (rows=287989836 width=135) Output:["cs_sold_date_sk","cs_ext_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_176] - Please refer to the previous Group By Operator [GBY_174] + BROADCAST [RS_180] + Please refer to the previous Group By Operator [GBY_178] diff --git a/ql/src/test/results/clientpositive/perf/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tez/query23.q.out index 6725bec301..aab3f9360c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -1,5 +1,5 @@ -Warning: Shuffle Join MERGEJOIN[581][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 27' is a cross product -Warning: Shuffle Join MERGEJOIN[583][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 35' is a cross product +Warning: Shuffle Join MERGEJOIN[585][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 28' is a cross product +Warning: Shuffle Join MERGEJOIN[587][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 36' is a cross product PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt @@ -107,11 +107,11 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) Map 13 <- Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) -Map 22 <- Reducer 30 (BROADCAST_EDGE) -Map 42 <- Reducer 12 (BROADCAST_EDGE) -Map 43 <- Reducer 37 (BROADCAST_EDGE) +Map 23 <- Reducer 31 (BROADCAST_EDGE) +Map 43 <- Reducer 12 (BROADCAST_EDGE), Reducer 22 (BROADCAST_EDGE) +Map 44 <- Reducer 38 (BROADCAST_EDGE) Reducer 10 <- Reducer 16 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 12 <- Map 7 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) Reducer 15 <- Map 20 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE) @@ -120,64 +120,65 @@ Reducer 17 <- Reducer 16 (CUSTOM_SIMPLE_EDGE) Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) -Reducer 23 <- Map 22 (SIMPLE_EDGE), Map 29 (SIMPLE_EDGE) -Reducer 24 <- Map 41 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) -Reducer 25 <- Reducer 24 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 28 (CUSTOM_SIMPLE_EDGE), Reducer 40 (CUSTOM_SIMPLE_EDGE) -Reducer 28 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) +Reducer 22 <- Map 20 (CUSTOM_SIMPLE_EDGE) +Reducer 24 <- Map 23 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 25 <- Map 42 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (CUSTOM_SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (CUSTOM_SIMPLE_EDGE), Reducer 29 (CUSTOM_SIMPLE_EDGE), Reducer 41 (CUSTOM_SIMPLE_EDGE) +Reducer 29 <- Reducer 26 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Reducer 16 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 31 <- Map 29 (SIMPLE_EDGE), Map 43 (SIMPLE_EDGE) -Reducer 32 <- Map 41 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) -Reducer 33 <- Reducer 32 (SIMPLE_EDGE) -Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 36 (CUSTOM_SIMPLE_EDGE), Reducer 40 (CUSTOM_SIMPLE_EDGE) -Reducer 36 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) -Reducer 37 <- Map 29 (CUSTOM_SIMPLE_EDGE) -Reducer 39 <- Map 38 (SIMPLE_EDGE), Map 41 (SIMPLE_EDGE) -Reducer 4 <- Reducer 27 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 40 <- Reducer 39 (SIMPLE_EDGE) +Reducer 31 <- Map 30 (CUSTOM_SIMPLE_EDGE) +Reducer 32 <- Map 30 (SIMPLE_EDGE), Map 44 (SIMPLE_EDGE) +Reducer 33 <- Map 42 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (SIMPLE_EDGE) +Reducer 35 <- Reducer 34 (CUSTOM_SIMPLE_EDGE) +Reducer 36 <- Reducer 35 (CUSTOM_SIMPLE_EDGE), Reducer 37 (CUSTOM_SIMPLE_EDGE), Reducer 41 (CUSTOM_SIMPLE_EDGE) +Reducer 37 <- Reducer 34 (CUSTOM_SIMPLE_EDGE) +Reducer 38 <- Map 30 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 28 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 40 <- Map 39 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) +Reducer 41 <- Reducer 40 (SIMPLE_EDGE) Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) -Reducer 9 <- Map 42 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 9 <- Map 43 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_684] - Limit [LIM_683] (rows=1 width=112) + File Output Operator [FS_691] + Limit [LIM_690] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_682] (rows=1 width=112) + Group By Operator [GBY_689] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Union 5 [CUSTOM_SIMPLE_EDGE] <-Reducer 11 [CONTAINS] - Reduce Output Operator [RS_594] - Group By Operator [GBY_593] (rows=1 width=112) + Reduce Output Operator [RS_598] + Group By Operator [GBY_597] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_591] (rows=191667562 width=135) + Select Operator [SEL_595] (rows=191667562 width=135) Output:["_col0"] - Merge Join Operator [MERGEJOIN_590] (rows=191667562 width=135) + Merge Join Operator [MERGEJOIN_594] (rows=191667562 width=135) Conds:RS_244._col2=RS_245._col0(Inner),Output:["_col3","_col4"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_244] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_580] (rows=174243235 width=135) - Conds:RS_241._col1=RS_633._col0(Inner),Output:["_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_584] (rows=174243235 width=135) + Conds:RS_241._col1=RS_640._col0(Inner),Output:["_col2","_col3","_col4"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_633] + SHUFFLE [RS_640] PartitionCols:_col0 - Group By Operator [GBY_630] (rows=58079562 width=88) + Group By Operator [GBY_637] (rows=58079562 width=88) Output:["_col0"],keys:_col1 - Select Operator [SEL_629] (rows=116159124 width=88) + Select Operator [SEL_636] (rows=116159124 width=88) Output:["_col1"] - Filter Operator [FIL_628] (rows=116159124 width=88) + Filter Operator [FIL_635] (rows=116159124 width=88) predicate:(_col3 > 4L) - Select Operator [SEL_627] (rows=348477374 width=88) + Select Operator [SEL_634] (rows=348477374 width=88) Output:["_col0","_col3"] - Group By Operator [GBY_626] (rows=348477374 width=88) + Group By Operator [GBY_633] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_24] @@ -186,350 +187,361 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col1, _col0, _col2 Select Operator [SEL_21] (rows=696954748 width=88) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_565] (rows=696954748 width=88) - Conds:RS_18._col1=RS_617._col0(Inner),Output:["_col3","_col5","_col6"] + Merge Join Operator [MERGEJOIN_569] (rows=696954748 width=88) + Conds:RS_18._col1=RS_621._col0(Inner),Output:["_col3","_col5","_col6"] <-Map 20 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_617] + PARTITION_ONLY_SHUFFLE [RS_621] PartitionCols:_col0 - Select Operator [SEL_616] (rows=462000 width=1436) + Select Operator [SEL_620] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_615] (rows=462000 width=1436) + Filter Operator [FIL_619] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_564] (rows=633595212 width=88) - Conds:RS_625._col0=RS_609._col0(Inner),Output:["_col1","_col3"] + Merge Join Operator [MERGEJOIN_568] (rows=633595212 width=88) + Conds:RS_632._col0=RS_613._col0(Inner),Output:["_col1","_col3"] <-Map 18 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_609] + PARTITION_ONLY_SHUFFLE [RS_613] PartitionCols:_col0 - Select Operator [SEL_608] (rows=36525 width=1119) + Select Operator [SEL_612] (rows=36525 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_607] (rows=36525 width=1119) + Filter Operator [FIL_611] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_year"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_625] + SHUFFLE [RS_632] PartitionCols:_col0 - Select Operator [SEL_624] (rows=575995635 width=88) + Select Operator [SEL_631] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_623] (rows=575995635 width=88) + Filter Operator [FIL_630] (rows=575995635 width=88) predicate:((ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_item_sk is not null and ss_sold_date_sk is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_614] - Group By Operator [GBY_613] (rows=1 width=12) + BROADCAST [RS_618] + Group By Operator [GBY_617] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_612] - Group By Operator [GBY_611] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_616] + Group By Operator [GBY_615] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_610] (rows=36525 width=1119) + Select Operator [SEL_614] (rows=36525 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_608] + Please refer to the previous Select Operator [SEL_612] <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_622] - Group By Operator [GBY_621] (rows=1 width=12) + BROADCAST [RS_629] + Group By Operator [GBY_628] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_620] - Group By Operator [GBY_619] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_626] + Group By Operator [GBY_624] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_618] (rows=462000 width=1436) + Select Operator [SEL_622] (rows=462000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_616] + Please refer to the previous Select Operator [SEL_620] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_241] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_571] (rows=158402938 width=135) - Conds:RS_689._col0=RS_599._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_575] (rows=158402938 width=135) + Conds:RS_698._col0=RS_603._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_599] + SHUFFLE [RS_603] PartitionCols:_col0 - Select Operator [SEL_596] (rows=18262 width=1119) + Select Operator [SEL_600] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_595] (rows=18262 width=1119) + Filter Operator [FIL_599] (rows=18262 width=1119) predicate:((d_moy = 1) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] - <-Map 42 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_689] + <-Map 43 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_698] PartitionCols:_col0 - Select Operator [SEL_688] (rows=144002668 width=135) + Select Operator [SEL_697] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_687] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_239_date_dim_d_date_sk_min) AND DynamicValue(RS_239_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_239_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) + Filter Operator [FIL_696] (rows=144002668 width=135) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_143_item_i_item_sk_min) AND DynamicValue(RS_143_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_143_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_239_date_dim_d_date_sk_min) AND DynamicValue(RS_239_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_239_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) TableScan [TS_124] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_quantity","ws_list_price"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_686] - Group By Operator [GBY_685] (rows=1 width=12) + BROADCAST [RS_693] + Group By Operator [GBY_692] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_604] - Group By Operator [GBY_602] (rows=1 width=12) + SHUFFLE [RS_608] + Group By Operator [GBY_606] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_600] (rows=18262 width=1119) + Select Operator [SEL_604] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_596] - <-Reducer 35 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_600] + <-Reducer 22 [BROADCAST_EDGE] vectorized + BROADCAST [RS_695] + Group By Operator [GBY_694] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_627] + Group By Operator [GBY_625] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_623] (rows=462000 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_620] + <-Reducer 36 [SIMPLE_EDGE] SHUFFLE [RS_245] PartitionCols:_col0 Select Operator [SEL_237] (rows=105599202 width=433) Output:["_col0"] Filter Operator [FIL_236] (rows=105599202 width=433) predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_583] (rows=316797606 width=433) + Merge Join Operator [MERGEJOIN_587] (rows=316797606 width=433) Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 40 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_681] - Group By Operator [GBY_679] (rows=316797606 width=88) + <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_688] + Group By Operator [GBY_686] (rows=316797606 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 39 [SIMPLE_EDGE] + <-Reducer 40 [SIMPLE_EDGE] SHUFFLE [RS_105] PartitionCols:_col0 Group By Operator [GBY_104] (rows=633595212 width=88) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 Select Operator [SEL_102] (rows=633595212 width=88) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_570] (rows=633595212 width=88) - Conds:RS_678._col0=RS_658._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_658] + Merge Join Operator [MERGEJOIN_574] (rows=633595212 width=88) + Conds:RS_685._col0=RS_665._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_665] PartitionCols:_col0 - Select Operator [SEL_657] (rows=80000000 width=860) + Select Operator [SEL_664] (rows=80000000 width=860) Output:["_col0"] - Filter Operator [FIL_656] (rows=80000000 width=860) + Filter Operator [FIL_663] (rows=80000000 width=860) predicate:c_customer_sk is not null TableScan [TS_96] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk"] - <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_678] + <-Map 39 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_685] PartitionCols:_col0 - Select Operator [SEL_677] (rows=575995635 width=88) + Select Operator [SEL_684] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_676] (rows=575995635 width=88) + Filter Operator [FIL_683] (rows=575995635 width=88) predicate:ss_customer_sk is not null TableScan [TS_93] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_707] - Select Operator [SEL_706] (rows=1 width=120) - Filter Operator [FIL_705] (rows=1 width=120) + <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_716] + Select Operator [SEL_715] (rows=1 width=120) + Filter Operator [FIL_714] (rows=1 width=120) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_704] (rows=1 width=120) + Group By Operator [GBY_713] (rows=1 width=120) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_703] (rows=1 width=120) - Group By Operator [GBY_702] (rows=1 width=120) + Select Operator [SEL_712] (rows=1 width=120) + Group By Operator [GBY_711] (rows=1 width=120) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_700] - Group By Operator [GBY_698] (rows=1 width=120) + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_709] + Group By Operator [GBY_707] (rows=1 width=120) Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_696] (rows=348477374 width=88) + Select Operator [SEL_705] (rows=348477374 width=88) Output:["_col0"] - Group By Operator [GBY_695] (rows=348477374 width=88) + Group By Operator [GBY_704] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 32 [SIMPLE_EDGE] + <-Reducer 33 [SIMPLE_EDGE] SHUFFLE [RS_175] PartitionCols:_col0 Group By Operator [GBY_174] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 Select Operator [SEL_172] (rows=696954748 width=88) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_575] (rows=696954748 width=88) - Conds:RS_169._col1=RS_660._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_660] + Merge Join Operator [MERGEJOIN_579] (rows=696954748 width=88) + Conds:RS_169._col1=RS_667._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_667] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_657] - <-Reducer 31 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_664] + <-Reducer 32 [SIMPLE_EDGE] SHUFFLE [RS_169] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_574] (rows=633595212 width=88) - Conds:RS_694._col0=RS_645._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 29 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_645] + Merge Join Operator [MERGEJOIN_578] (rows=633595212 width=88) + Conds:RS_703._col0=RS_652._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 30 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_652] PartitionCols:_col0 - Select Operator [SEL_642] (rows=36525 width=1119) + Select Operator [SEL_649] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_641] (rows=36525 width=1119) + Filter Operator [FIL_648] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) TableScan [TS_36] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_694] + <-Map 44 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_703] PartitionCols:_col0 - Select Operator [SEL_693] (rows=575995635 width=88) + Select Operator [SEL_702] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_692] (rows=575995635 width=88) + Filter Operator [FIL_701] (rows=575995635 width=88) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_167_date_dim_d_date_sk_min) AND DynamicValue(RS_167_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_167_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_157] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 37 [BROADCAST_EDGE] vectorized - BROADCAST [RS_691] - Group By Operator [GBY_690] (rows=1 width=12) + <-Reducer 38 [BROADCAST_EDGE] vectorized + BROADCAST [RS_700] + Group By Operator [GBY_699] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_650] - Group By Operator [GBY_648] (rows=1 width=12) + <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_657] + Group By Operator [GBY_655] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_646] (rows=36525 width=1119) + Select Operator [SEL_653] (rows=36525 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_642] - <-Reducer 36 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_709] - Group By Operator [GBY_708] (rows=1 width=224) + Please refer to the previous Select Operator [SEL_649] + <-Reducer 37 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_718] + Group By Operator [GBY_717] (rows=1 width=224) Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 33 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_701] - Group By Operator [GBY_699] (rows=1 width=224) + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_710] + Group By Operator [GBY_708] (rows=1 width=224) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_697] (rows=348477374 width=88) + Select Operator [SEL_706] (rows=348477374 width=88) Output:["_col1"] - Please refer to the previous Group By Operator [GBY_695] + Please refer to the previous Group By Operator [GBY_704] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_589] - Group By Operator [GBY_588] (rows=1 width=112) + Reduce Output Operator [RS_593] + Group By Operator [GBY_592] (rows=1 width=112) Output:["_col0"],aggregations:["sum(_col0)"] - Select Operator [SEL_586] (rows=383314495 width=135) + Select Operator [SEL_590] (rows=383314495 width=135) Output:["_col0"] - Merge Join Operator [MERGEJOIN_585] (rows=383314495 width=135) + Merge Join Operator [MERGEJOIN_589] (rows=383314495 width=135) Conds:RS_120._col1=RS_121._col0(Inner),Output:["_col3","_col4"] - <-Reducer 27 [SIMPLE_EDGE] + <-Reducer 28 [SIMPLE_EDGE] SHUFFLE [RS_121] PartitionCols:_col0 Select Operator [SEL_113] (rows=105599202 width=433) Output:["_col0"] Filter Operator [FIL_112] (rows=105599202 width=433) predicate:(_col3 > (0.95 * _col1)) - Merge Join Operator [MERGEJOIN_581] (rows=316797606 width=433) + Merge Join Operator [MERGEJOIN_585] (rows=316797606 width=433) Conds:(Inner),(Inner),Output:["_col1","_col2","_col3"] - <-Reducer 40 [CUSTOM_SIMPLE_EDGE] vectorized + <-Reducer 41 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_687] + Please refer to the previous Group By Operator [GBY_686] + <-Reducer 27 [CUSTOM_SIMPLE_EDGE] vectorized PARTITION_ONLY_SHUFFLE [RS_680] - Please refer to the previous Group By Operator [GBY_679] - <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_673] - Select Operator [SEL_672] (rows=1 width=120) - Filter Operator [FIL_671] (rows=1 width=120) + Select Operator [SEL_679] (rows=1 width=120) + Filter Operator [FIL_678] (rows=1 width=120) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_670] (rows=1 width=120) + Group By Operator [GBY_677] (rows=1 width=120) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_669] (rows=1 width=120) - Group By Operator [GBY_668] (rows=1 width=120) + Select Operator [SEL_676] (rows=1 width=120) + Group By Operator [GBY_675] (rows=1 width=120) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_666] - Group By Operator [GBY_664] (rows=1 width=120) + <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_673] + Group By Operator [GBY_671] (rows=1 width=120) Output:["_col0"],aggregations:["count(_col0)"] - Select Operator [SEL_662] (rows=348477374 width=88) + Select Operator [SEL_669] (rows=348477374 width=88) Output:["_col0"] - Group By Operator [GBY_661] (rows=348477374 width=88) + Group By Operator [GBY_668] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 24 [SIMPLE_EDGE] + <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_51] PartitionCols:_col0 Group By Operator [GBY_50] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 Select Operator [SEL_48] (rows=696954748 width=88) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_567] (rows=696954748 width=88) - Conds:RS_45._col1=RS_659._col0(Inner),Output:["_col2","_col3","_col6"] - <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_659] + Merge Join Operator [MERGEJOIN_571] (rows=696954748 width=88) + Conds:RS_45._col1=RS_666._col0(Inner),Output:["_col2","_col3","_col6"] + <-Map 42 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_666] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_657] - <-Reducer 23 [SIMPLE_EDGE] + Please refer to the previous Select Operator [SEL_664] + <-Reducer 24 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_566] (rows=633595212 width=88) - Conds:RS_655._col0=RS_643._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 29 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_643] + Merge Join Operator [MERGEJOIN_570] (rows=633595212 width=88) + Conds:RS_662._col0=RS_650._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 30 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_650] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_642] - <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_655] + Please refer to the previous Select Operator [SEL_649] + <-Map 23 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_662] PartitionCols:_col0 - Select Operator [SEL_654] (rows=575995635 width=88) + Select Operator [SEL_661] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_653] (rows=575995635 width=88) + Filter Operator [FIL_660] (rows=575995635 width=88) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_43_date_dim_d_date_sk_min) AND DynamicValue(RS_43_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_43_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_33] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_quantity","ss_sales_price"] - <-Reducer 30 [BROADCAST_EDGE] vectorized - BROADCAST [RS_652] - Group By Operator [GBY_651] (rows=1 width=12) + <-Reducer 31 [BROADCAST_EDGE] vectorized + BROADCAST [RS_659] + Group By Operator [GBY_658] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_649] - Group By Operator [GBY_647] (rows=1 width=12) + <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_656] + Group By Operator [GBY_654] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_644] (rows=36525 width=1119) + Select Operator [SEL_651] (rows=36525 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_642] - <-Reducer 28 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_675] - Group By Operator [GBY_674] (rows=1 width=224) + Please refer to the previous Select Operator [SEL_649] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_682] + Group By Operator [GBY_681] (rows=1 width=224) Output:["_col0"],aggregations:["max(VALUE._col0)"] - <-Reducer 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_667] - Group By Operator [GBY_665] (rows=1 width=224) + <-Reducer 26 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_674] + Group By Operator [GBY_672] (rows=1 width=224) Output:["_col0"],aggregations:["max(_col1)"] - Select Operator [SEL_663] (rows=348477374 width=88) + Select Operator [SEL_670] (rows=348477374 width=88) Output:["_col1"] - Please refer to the previous Group By Operator [GBY_661] + Please refer to the previous Group By Operator [GBY_668] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_120] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_579] (rows=348467716 width=135) - Conds:RS_117._col2=RS_631._col0(Inner),Output:["_col1","_col3","_col4"] + Merge Join Operator [MERGEJOIN_583] (rows=348467716 width=135) + Conds:RS_117._col2=RS_638._col0(Inner),Output:["_col1","_col3","_col4"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_631] + SHUFFLE [RS_638] PartitionCols:_col0 - Please refer to the previous Group By Operator [GBY_630] + Please refer to the previous Group By Operator [GBY_637] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_117] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_563] (rows=316788826 width=135) - Conds:RS_640._col0=RS_597._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_567] (rows=316788826 width=135) + Conds:RS_647._col0=RS_601._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_597] + SHUFFLE [RS_601] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_596] + Please refer to the previous Select Operator [SEL_600] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_640] + SHUFFLE [RS_647] PartitionCols:_col0 - Select Operator [SEL_639] (rows=287989836 width=135) + Select Operator [SEL_646] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_638] (rows=287989836 width=135) + Filter Operator [FIL_645] (rows=287989836 width=135) predicate:((cs_item_sk BETWEEN DynamicValue(RS_118_item_i_item_sk_min) AND DynamicValue(RS_118_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_118_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_115_date_dim_d_date_sk_min) AND DynamicValue(RS_115_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_115_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_637] - Group By Operator [GBY_636] (rows=1 width=20) + BROADCAST [RS_644] + Group By Operator [GBY_643] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=58079560)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_635] - Group By Operator [GBY_634] (rows=1 width=20) + SHUFFLE [RS_642] + Group By Operator [GBY_641] (rows=1 width=20) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=58079560)"] - Select Operator [SEL_632] (rows=58079562 width=88) + Select Operator [SEL_639] (rows=58079562 width=88) Output:["_col0"] - Please refer to the previous Group By Operator [GBY_630] + Please refer to the previous Group By Operator [GBY_637] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_606] - Group By Operator [GBY_605] (rows=1 width=12) + BROADCAST [RS_610] + Group By Operator [GBY_609] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_603] - Group By Operator [GBY_601] (rows=1 width=12) + SHUFFLE [RS_607] + Group By Operator [GBY_605] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_598] (rows=18262 width=1119) + Select Operator [SEL_602] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_596] + Please refer to the previous Select Operator [SEL_600] diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out index 9fcec42c3a..349d429ab3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[286][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product +Warning: Shuffle Join MERGEJOIN[290][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 8' is a cross product PREHOOK: query: explain with ssales as (select c_last_name @@ -137,281 +137,281 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_89] (rows=77303902 width=321) predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_286] (rows=231911707 width=321) + Merge Join Operator [MERGEJOIN_290] (rows=231911707 width=321) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_376] - Select Operator [SEL_375] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_380] + Select Operator [SEL_379] (rows=1 width=232) Output:["_col0"] - Group By Operator [GBY_374] (rows=1 width=232) + Group By Operator [GBY_378] (rows=1 width=232) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_373] - Group By Operator [GBY_372] (rows=1 width=232) + PARTITION_ONLY_SHUFFLE [RS_377] + Group By Operator [GBY_376] (rows=1 width=232) Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] - Select Operator [SEL_371] (rows=463823414 width=88) + Select Operator [SEL_375] (rows=463823414 width=88) Output:["_col10"] - Group By Operator [GBY_370] (rows=463823414 width=88) + Group By Operator [GBY_374] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_78] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Group By Operator [GBY_77] (rows=927646829 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col11, _col12, _col6, _col8, _col15, _col16, _col17, _col18, _col19, _col22 - Merge Join Operator [MERGEJOIN_285] (rows=927646829 width=88) - Conds:RS_73._col9, _col13=RS_351._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col22"] + Merge Join Operator [MERGEJOIN_289] (rows=927646829 width=88) + Conds:RS_73._col9, _col13=RS_355._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col8","_col11","_col12","_col15","_col16","_col17","_col18","_col19","_col22"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_351] + SHUFFLE [RS_355] PartitionCols:_col1, upper(_col2) - Select Operator [SEL_349] (rows=40000000 width=1014) + Select Operator [SEL_353] (rows=40000000 width=1014) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_348] (rows=40000000 width=1014) + Filter Operator [FIL_352] (rows=40000000 width=1014) predicate:(ca_zip is not null and upper(ca_country) is not null) TableScan [TS_15] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_state","ca_zip","ca_country"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_73] PartitionCols:_col9, _col13 - Merge Join Operator [MERGEJOIN_284] (rows=843315281 width=88) - Conds:RS_70._col0, _col3=RS_330._col0, _col1(Inner),Output:["_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_288] (rows=843315281 width=88) + Conds:RS_70._col0, _col3=RS_334._col0, _col1(Inner),Output:["_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_330] + SHUFFLE [RS_334] PartitionCols:_col0, _col1 - Select Operator [SEL_326] (rows=57591150 width=77) + Select Operator [SEL_330] (rows=57591150 width=77) Output:["_col0","_col1"] - Filter Operator [FIL_325] (rows=57591150 width=77) + Filter Operator [FIL_329] (rows=57591150 width=77) predicate:(sr_item_sk is not null and sr_ticket_number is not null) TableScan [TS_12] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_70] PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_283] (rows=766650239 width=88) - Conds:RS_67._col0=RS_293._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_287] (rows=766650239 width=88) + Conds:RS_67._col0=RS_297._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13","_col15","_col16","_col17","_col18","_col19"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] + SHUFFLE [RS_297] PartitionCols:_col0 - Select Operator [SEL_290] (rows=462000 width=1436) + Select Operator [SEL_294] (rows=462000 width=1436) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_288] (rows=462000 width=1436) + Filter Operator [FIL_292] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_3] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_282] (rows=696954748 width=88) - Conds:RS_64._col1=RS_317._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_286] (rows=696954748 width=88) + Conds:RS_64._col1=RS_321._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col8","_col9","_col11","_col12","_col13"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] + SHUFFLE [RS_321] PartitionCols:_col0 - Select Operator [SEL_314] (rows=80000000 width=860) + Select Operator [SEL_318] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_313] (rows=80000000 width=860) + Filter Operator [FIL_317] (rows=80000000 width=860) predicate:(c_birth_country is not null and c_customer_sk is not null) TableScan [TS_9] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_281] (rows=633595212 width=88) - Conds:RS_369._col2=RS_305._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col8","_col9"] + Merge Join Operator [MERGEJOIN_285] (rows=633595212 width=88) + Conds:RS_373._col2=RS_309._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col8","_col9"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_305] + SHUFFLE [RS_309] PartitionCols:_col0 - Select Operator [SEL_302] (rows=852 width=1910) + Select Operator [SEL_306] (rows=852 width=1910) Output:["_col0","_col1","_col3","_col4"] - Filter Operator [FIL_301] (rows=852 width=1910) + Filter Operator [FIL_305] (rows=852 width=1910) predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) TableScan [TS_6] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_369] + SHUFFLE [RS_373] PartitionCols:_col2 - Select Operator [SEL_368] (rows=575995635 width=88) + Select Operator [SEL_372] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_367] (rows=575995635 width=88) + Filter Operator [FIL_371] (rows=575995635 width=88) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_65_customer_c_customer_sk_min) AND DynamicValue(RS_65_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_65_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_68_item_i_item_sk_min) AND DynamicValue(RS_68_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_68_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_71_store_returns_sr_item_sk_min) AND DynamicValue(RS_71_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_71_store_returns_sr_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_62_store_s_store_sk_min) AND DynamicValue(RS_62_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_62_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_71_store_returns_sr_ticket_number_min) AND DynamicValue(RS_71_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_71_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_43] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_362] - Group By Operator [GBY_361] (rows=1 width=12) + BROADCAST [RS_366] + Group By Operator [GBY_365] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] - Group By Operator [GBY_296] (rows=1 width=12) + SHUFFLE [RS_302] + Group By Operator [GBY_300] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_294] (rows=462000 width=1436) + Select Operator [SEL_298] (rows=462000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_290] + Please refer to the previous Select Operator [SEL_294] <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_358] - Group By Operator [GBY_357] (rows=1 width=12) + BROADCAST [RS_362] + Group By Operator [GBY_361] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] - Group By Operator [GBY_308] (rows=1 width=12) + SHUFFLE [RS_314] + Group By Operator [GBY_312] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_306] (rows=852 width=1910) + Select Operator [SEL_310] (rows=852 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_302] + Please refer to the previous Select Operator [SEL_306] <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_360] - Group By Operator [GBY_359] (rows=1 width=12) + BROADCAST [RS_364] + Group By Operator [GBY_363] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] - Group By Operator [GBY_320] (rows=1 width=12) + SHUFFLE [RS_326] + Group By Operator [GBY_324] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_318] (rows=80000000 width=860) + Select Operator [SEL_322] (rows=80000000 width=860) Output:["_col0"] - Please refer to the previous Select Operator [SEL_314] + Please refer to the previous Select Operator [SEL_318] <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_364] - Group By Operator [GBY_363] (rows=1 width=12) + BROADCAST [RS_368] + Group By Operator [GBY_367] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] - Group By Operator [GBY_335] (rows=1 width=12) + SHUFFLE [RS_343] + Group By Operator [GBY_339] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_331] (rows=57591150 width=77) + Select Operator [SEL_335] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_326] + Please refer to the previous Select Operator [SEL_330] <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Group By Operator [GBY_365] (rows=1 width=12) + BROADCAST [RS_370] + Group By Operator [GBY_369] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_340] - Group By Operator [GBY_336] (rows=1 width=12) + SHUFFLE [RS_344] + Group By Operator [GBY_340] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_332] (rows=57591150 width=77) + Select Operator [SEL_336] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_326] + Please refer to the previous Select Operator [SEL_330] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_356] - Select Operator [SEL_355] (rows=231911707 width=88) + PARTITION_ONLY_SHUFFLE [RS_360] + Select Operator [SEL_359] (rows=231911707 width=88) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_354] (rows=231911707 width=88) + Group By Operator [GBY_358] (rows=231911707 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col9)"],keys:_col1, _col2, _col7 - Select Operator [SEL_353] (rows=463823414 width=88) + Select Operator [SEL_357] (rows=463823414 width=88) Output:["_col1","_col2","_col7","_col9"] - Group By Operator [GBY_352] (rows=463823414 width=88) + Group By Operator [GBY_356] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_34] (rows=927646829 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col17, _col18, _col12, _col22, _col6, _col7, _col9, _col10, _col14 - Merge Join Operator [MERGEJOIN_280] (rows=927646829 width=88) - Conds:RS_30._col15, _col19=RS_350._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col17","_col18","_col22"] + Merge Join Operator [MERGEJOIN_284] (rows=927646829 width=88) + Conds:RS_30._col15, _col19=RS_354._col1, upper(_col2)(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col17","_col18","_col22"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] + SHUFFLE [RS_354] PartitionCols:_col1, upper(_col2) - Please refer to the previous Select Operator [SEL_349] + Please refer to the previous Select Operator [SEL_353] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col15, _col19 - Merge Join Operator [MERGEJOIN_279] (rows=843315281 width=88) - Conds:RS_27._col0, _col3=RS_327._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_283] (rows=843315281 width=88) + Conds:RS_27._col0, _col3=RS_331._col0, _col1(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] + SHUFFLE [RS_331] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_326] + Please refer to the previous Select Operator [SEL_330] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0, _col3 - Merge Join Operator [MERGEJOIN_278] (rows=766650239 width=88) - Conds:RS_24._col1=RS_315._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_282] (rows=766650239 width=88) + Conds:RS_24._col1=RS_319._col0(Inner),Output:["_col0","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15","_col17","_col18","_col19"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + SHUFFLE [RS_319] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_314] + Please refer to the previous Select Operator [SEL_318] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_277] (rows=696954748 width=88) - Conds:RS_21._col2=RS_303._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15"] + Merge Join Operator [MERGEJOIN_281] (rows=696954748 width=88) + Conds:RS_21._col2=RS_307._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col6","_col7","_col9","_col10","_col12","_col14","_col15"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_303] + SHUFFLE [RS_307] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_302] + Please refer to the previous Select Operator [SEL_306] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_276] (rows=633595212 width=88) - Conds:RS_347._col0=RS_291._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_280] (rows=633595212 width=88) + Conds:RS_351._col0=RS_295._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col7","_col9","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_291] + SHUFFLE [RS_295] PartitionCols:_col0 - Select Operator [SEL_289] (rows=231000 width=1436) + Select Operator [SEL_293] (rows=231000 width=1436) Output:["_col0","_col1","_col2","_col4","_col5"] - Filter Operator [FIL_287] (rows=231000 width=1436) + Filter Operator [FIL_291] (rows=231000 width=1436) predicate:((i_color = 'orchid') and i_item_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] + SHUFFLE [RS_351] PartitionCols:_col0 - Select Operator [SEL_346] (rows=575995635 width=88) + Select Operator [SEL_350] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_345] (rows=575995635 width=88) + Filter Operator [FIL_349] (rows=575995635 width=88) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_25_customer_c_customer_sk_min) AND DynamicValue(RS_25_customer_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_25_customer_c_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_19_item_i_item_sk_min) AND DynamicValue(RS_19_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_19_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_28_store_returns_sr_item_sk_min) AND DynamicValue(RS_28_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_28_store_returns_sr_item_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_22_store_s_store_sk_min) AND DynamicValue(RS_22_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_22_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_28_store_returns_sr_ticket_number_min) AND DynamicValue(RS_28_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_28_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_300] - Group By Operator [GBY_299] (rows=1 width=12) + BROADCAST [RS_304] + Group By Operator [GBY_303] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] - Group By Operator [GBY_295] (rows=1 width=12) + SHUFFLE [RS_301] + Group By Operator [GBY_299] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_292] (rows=231000 width=1436) + Select Operator [SEL_296] (rows=231000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] + Please refer to the previous Select Operator [SEL_293] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_312] - Group By Operator [GBY_311] (rows=1 width=12) + BROADCAST [RS_316] + Group By Operator [GBY_315] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_309] - Group By Operator [GBY_307] (rows=1 width=12) + SHUFFLE [RS_313] + Group By Operator [GBY_311] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_304] (rows=852 width=1910) + Select Operator [SEL_308] (rows=852 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_302] + Please refer to the previous Select Operator [SEL_306] <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_324] - Group By Operator [GBY_323] (rows=1 width=12) + BROADCAST [RS_328] + Group By Operator [GBY_327] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] - Group By Operator [GBY_319] (rows=1 width=12) + SHUFFLE [RS_325] + Group By Operator [GBY_323] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_316] (rows=80000000 width=860) + Select Operator [SEL_320] (rows=80000000 width=860) Output:["_col0"] - Please refer to the previous Select Operator [SEL_314] + Please refer to the previous Select Operator [SEL_318] <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_342] - Group By Operator [GBY_341] (rows=1 width=12) + BROADCAST [RS_346] + Group By Operator [GBY_345] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_337] - Group By Operator [GBY_333] (rows=1 width=12) + SHUFFLE [RS_341] + Group By Operator [GBY_337] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_328] (rows=57591150 width=77) + Select Operator [SEL_332] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_326] + Please refer to the previous Select Operator [SEL_330] <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_344] - Group By Operator [GBY_343] (rows=1 width=12) + BROADCAST [RS_348] + Group By Operator [GBY_347] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] - Group By Operator [GBY_334] (rows=1 width=12) + SHUFFLE [RS_342] + Group By Operator [GBY_338] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_329] (rows=57591150 width=77) + Select Operator [SEL_333] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_326] + Please refer to the previous Select Operator [SEL_330] diff --git a/ql/src/test/results/clientpositive/perf/tez/query25.q.out b/ql/src/test/results/clientpositive/perf/tez/query25.q.out index a885cf344b..77a9a0771b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query25.q.out @@ -95,7 +95,7 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Map 1 <- Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Map 16 <- Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) Reducer 10 <- Map 16 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE) @@ -118,189 +118,195 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_259] - Limit [LIM_258] (rows=100 width=88) + File Output Operator [FS_269] + Limit [LIM_268] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_257] (rows=421657640 width=88) + Select Operator [SEL_267] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_256] - Group By Operator [GBY_255] (rows=421657640 width=88) + SHUFFLE [RS_266] + Group By Operator [GBY_265] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_48] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","sum(_col20)","sum(_col12)"],keys:_col25, _col26, _col28, _col29 - Merge Join Operator [MERGEJOIN_205] (rows=843315281 width=88) - Conds:RS_44._col3=RS_234._col0(Inner),Output:["_col5","_col12","_col20","_col25","_col26","_col28","_col29"] + Merge Join Operator [MERGEJOIN_213] (rows=843315281 width=88) + Conds:RS_44._col3=RS_251._col0(Inner),Output:["_col5","_col12","_col20","_col25","_col26","_col28","_col29"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + SHUFFLE [RS_251] PartitionCols:_col0 - Select Operator [SEL_233] (rows=1704 width=1910) + Select Operator [SEL_250] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_232] (rows=1704 width=1910) + Filter Operator [FIL_249] (rows=1704 width=1910) predicate:s_store_sk is not null TableScan [TS_32] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_204] (rows=766650239 width=88) - Conds:RS_41._col1=RS_225._col0(Inner),Output:["_col3","_col5","_col12","_col20","_col25","_col26"] + Merge Join Operator [MERGEJOIN_212] (rows=766650239 width=88) + Conds:RS_41._col1=RS_242._col0(Inner),Output:["_col3","_col5","_col12","_col20","_col25","_col26"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] + SHUFFLE [RS_242] PartitionCols:_col0 - Select Operator [SEL_224] (rows=462000 width=1436) + Select Operator [SEL_241] (rows=462000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_223] (rows=462000 width=1436) + Filter Operator [FIL_240] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_29] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_203] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_211] (rows=696954748 width=88) Conds:RS_38._col1, _col2, _col4=RS_39._col8, _col9, _col10(Inner),Output:["_col1","_col3","_col5","_col12","_col20"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col8, _col9, _col10 - Merge Join Operator [MERGEJOIN_202] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_210] (rows=348467716 width=135) Conds:RS_25._col2, _col1=RS_26._col1, _col2(Inner),Output:["_col3","_col8","_col9","_col10","_col11"] <-Reducer 13 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_26] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_201] (rows=63350266 width=77) - Conds:RS_247._col0=RS_216._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_209] (rows=63350266 width=77) + Conds:RS_233._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_216] + PARTITION_ONLY_SHUFFLE [RS_224] PartitionCols:_col0 - Select Operator [SEL_211] (rows=4058 width=1119) + Select Operator [SEL_219] (rows=4058 width=1119) Output:["_col0"] - Filter Operator [FIL_208] (rows=4058 width=1119) + Filter Operator [FIL_216] (rows=4058 width=1119) predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_247] + SHUFFLE [RS_233] PartitionCols:_col0 - Select Operator [SEL_246] (rows=57591150 width=77) + Select Operator [SEL_232] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_245] (rows=57591150 width=77) + Filter Operator [FIL_231] (rows=57591150 width=77) predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) TableScan [TS_12] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_200] (rows=316788826 width=135) - Conds:RS_254._col0=RS_214._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_208] (rows=316788826 width=135) + Conds:RS_264._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_214] + PARTITION_ONLY_SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_210] (rows=4058 width=1119) + Select Operator [SEL_218] (rows=4058 width=1119) Output:["_col0"] - Filter Operator [FIL_207] (rows=4058 width=1119) + Filter Operator [FIL_215] (rows=4058 width=1119) predicate:((d_year = 2000) and d_date_sk is not null and d_moy BETWEEN 4 AND 10) Please refer to the previous TableScan [TS_3] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] + SHUFFLE [RS_264] PartitionCols:_col0 - Select Operator [SEL_253] (rows=287989836 width=135) + Select Operator [SEL_263] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_252] (rows=287989836 width=135) + Filter Operator [FIL_262] (rows=287989836 width=135) predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_23_d3_d_date_sk_min) AND DynamicValue(RS_23_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_6] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] - <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_231] - Group By Operator [GBY_229] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - Group By Operator [GBY_227] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_226] (rows=462000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_224] - <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_244] - Group By Operator [GBY_243] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_220] - Group By Operator [GBY_218] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_215] (rows=4058 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_210] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_249] - Group By Operator [GBY_248] (rows=1 width=12) + BROADCAST [RS_236] + Group By Operator [GBY_234] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_137] - Group By Operator [GBY_136] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_135] (rows=63350266 width=77) + Select Operator [SEL_105] (rows=63350266 width=77) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_201] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_251] - Group By Operator [GBY_250] (rows=1 width=12) + BROADCAST [RS_239] + Group By Operator [GBY_237] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_142] - Group By Operator [GBY_141] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] - Select Operator [SEL_140] (rows=63350266 width=77) + Select Operator [SEL_120] (rows=63350266 width=77) + Output:["_col0"] + Please refer to the previous Merge Join Operator [MERGEJOIN_209] + <-Reducer 19 [BROADCAST_EDGE] vectorized + BROADCAST [RS_248] + Group By Operator [GBY_246] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 18 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_245] + Group By Operator [GBY_244] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_243] (rows=462000 width=1436) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_201] + Please refer to the previous Select Operator [SEL_241] + <-Reducer 12 [BROADCAST_EDGE] vectorized + BROADCAST [RS_261] + Group By Operator [GBY_260] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_228] + Group By Operator [GBY_226] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_223] (rows=4058 width=1119) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_218] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_199] (rows=633595212 width=88) - Conds:RS_242._col0=RS_212._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_207] (rows=633595212 width=88) + Conds:RS_259._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_212] + PARTITION_ONLY_SHUFFLE [RS_220] PartitionCols:_col0 - Select Operator [SEL_209] (rows=18262 width=1119) + Select Operator [SEL_217] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_206] (rows=18262 width=1119) + Filter Operator [FIL_214] (rows=18262 width=1119) predicate:((d_moy = 4) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] + SHUFFLE [RS_259] PartitionCols:_col0 - Select Operator [SEL_241] (rows=575995635 width=88) + Select Operator [SEL_258] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_240] (rows=575995635 width=88) - predicate:((ss_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + Filter Operator [FIL_257] (rows=575995635 width=88) + predicate:((ss_customer_sk BETWEEN DynamicValue(RS_26_store_returns_sr_customer_sk_min) AND DynamicValue(RS_26_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_26_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_26_store_returns_sr_item_sk_min) AND DynamicValue(RS_26_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_26_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_42_item_i_item_sk_min) AND DynamicValue(RS_42_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_42_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_45_store_s_store_sk_min) AND DynamicValue(RS_45_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_45_store_s_store_sk_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] + <-Reducer 14 [BROADCAST_EDGE] vectorized + BROADCAST [RS_235] + Please refer to the previous Group By Operator [GBY_234] + <-Reducer 15 [BROADCAST_EDGE] vectorized + BROADCAST [RS_238] + Please refer to the previous Group By Operator [GBY_237] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_230] - Please refer to the previous Group By Operator [GBY_229] + BROADCAST [RS_247] + Please refer to the previous Group By Operator [GBY_246] <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_239] - Group By Operator [GBY_238] (rows=1 width=12) + BROADCAST [RS_256] + Group By Operator [GBY_255] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] - Group By Operator [GBY_236] (rows=1 width=12) + SHUFFLE [RS_254] + Group By Operator [GBY_253] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_235] (rows=1704 width=1910) + Select Operator [SEL_252] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_233] + Please refer to the previous Select Operator [SEL_250] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_222] - Group By Operator [GBY_221] (rows=1 width=12) + BROADCAST [RS_230] + Group By Operator [GBY_229] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_219] - Group By Operator [GBY_217] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_227] + Group By Operator [GBY_225] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=18262 width=1119) + Select Operator [SEL_221] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_209] + Please refer to the previous Select Operator [SEL_217] diff --git a/ql/src/test/results/clientpositive/perf/tez/query29.q.out b/ql/src/test/results/clientpositive/perf/tez/query29.q.out index 46ff49d41a..791ddb6e0b 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -117,116 +117,116 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_257] - Limit [LIM_256] (rows=100 width=88) + File Output Operator [FS_259] + Limit [LIM_258] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_255] (rows=463823414 width=88) + Select Operator [SEL_257] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_254] - Group By Operator [GBY_253] (rows=463823414 width=88) + SHUFFLE [RS_256] + Group By Operator [GBY_255] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_48] (rows=927646829 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col14)","sum(_col22)","sum(_col3)"],keys:_col7, _col8, _col27, _col28 - Merge Join Operator [MERGEJOIN_202] (rows=927646829 width=88) + Merge Join Operator [MERGEJOIN_204] (rows=927646829 width=88) Conds:RS_44._col1, _col2=RS_45._col14, _col13(Inner),Output:["_col3","_col7","_col8","_col14","_col22","_col27","_col28"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col14, _col13 Select Operator [SEL_40] (rows=843315281 width=88) Output:["_col1","_col2","_col8","_col13","_col14","_col16","_col21","_col22"] - Merge Join Operator [MERGEJOIN_201] (rows=843315281 width=88) - Conds:RS_37._col3=RS_244._col0(Inner),Output:["_col5","_col10","_col11","_col13","_col18","_col19","_col21","_col22"] + Merge Join Operator [MERGEJOIN_203] (rows=843315281 width=88) + Conds:RS_37._col3=RS_246._col0(Inner),Output:["_col5","_col10","_col11","_col13","_col18","_col19","_col21","_col22"] <-Map 22 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_244] + PARTITION_ONLY_SHUFFLE [RS_246] PartitionCols:_col0 - Select Operator [SEL_243] (rows=1704 width=1910) + Select Operator [SEL_245] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_242] (rows=1704 width=1910) + Filter Operator [FIL_244] (rows=1704 width=1910) predicate:s_store_sk is not null TableScan [TS_25] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_200] (rows=766650239 width=88) - Conds:RS_34._col1=RS_236._col0(Inner),Output:["_col3","_col5","_col10","_col11","_col13","_col18","_col19"] + Merge Join Operator [MERGEJOIN_202] (rows=766650239 width=88) + Conds:RS_34._col1=RS_238._col0(Inner),Output:["_col3","_col5","_col10","_col11","_col13","_col18","_col19"] <-Map 20 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_236] + PARTITION_ONLY_SHUFFLE [RS_238] PartitionCols:_col0 - Select Operator [SEL_235] (rows=462000 width=1436) + Select Operator [SEL_237] (rows=462000 width=1436) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_234] (rows=462000 width=1436) + Filter Operator [FIL_236] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_22] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_199] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_201] (rows=696954748 width=88) Conds:RS_31._col1, _col2, _col4=RS_32._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col10","_col11","_col13"] <-Reducer 15 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_32] PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_198] (rows=63350266 width=77) - Conds:RS_227._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_200] (rows=63350266 width=77) + Conds:RS_229._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_217] (rows=4058 width=1119) + Select Operator [SEL_219] (rows=4058 width=1119) Output:["_col0"] - Filter Operator [FIL_215] (rows=4058 width=1119) + Filter Operator [FIL_217] (rows=4058 width=1119) predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_227] + SHUFFLE [RS_229] PartitionCols:_col0 - Select Operator [SEL_226] (rows=57591150 width=77) + Select Operator [SEL_228] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_225] (rows=57591150 width=77) + Filter Operator [FIL_227] (rows=57591150 width=77) predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) TableScan [TS_12] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_197] (rows=633595212 width=88) - Conds:RS_252._col0=RS_218._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_199] (rows=633595212 width=88) + Conds:RS_254._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_218] + SHUFFLE [RS_220] PartitionCols:_col0 - Select Operator [SEL_216] (rows=18262 width=1119) + Select Operator [SEL_218] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_214] (rows=18262 width=1119) + Filter Operator [FIL_216] (rows=18262 width=1119) predicate:((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_9] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_252] + SHUFFLE [RS_254] PartitionCols:_col0 - Select Operator [SEL_251] (rows=575995635 width=88) + Select Operator [SEL_253] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_250] (rows=575995635 width=88) + Filter Operator [FIL_252] (rows=575995635 width=88) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_32_store_returns_sr_customer_sk_min) AND DynamicValue(RS_32_store_returns_sr_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_32_store_returns_sr_customer_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_32_store_returns_sr_item_sk_min) AND DynamicValue(RS_32_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_32_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_35_item_i_item_sk_min) AND DynamicValue(RS_35_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_35_item_i_item_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_29_d1_d_date_sk_min) AND DynamicValue(RS_29_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_38_store_s_store_sk_min) AND DynamicValue(RS_38_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_38_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_32_store_returns_sr_ticket_number_min) AND DynamicValue(RS_32_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_32_store_returns_sr_ticket_number_bloom_filter))) and ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_6] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_224] - Group By Operator [GBY_223] (rows=1 width=12) + BROADCAST [RS_226] + Group By Operator [GBY_225] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] - Group By Operator [GBY_221] (rows=1 width=12) + SHUFFLE [RS_224] + Group By Operator [GBY_223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_219] (rows=18262 width=1119) + Select Operator [SEL_221] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] + Please refer to the previous Select Operator [SEL_218] <-Reducer 16 [BROADCAST_EDGE] vectorized - BROADCAST [RS_229] - Group By Operator [GBY_228] (rows=1 width=12) + BROADCAST [RS_231] + Group By Operator [GBY_230] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_120] @@ -234,10 +234,10 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] Select Operator [SEL_118] (rows=63350266 width=77) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_198] + Please refer to the previous Merge Join Operator [MERGEJOIN_200] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_231] - Group By Operator [GBY_230] (rows=1 width=12) + BROADCAST [RS_233] + Group By Operator [GBY_232] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_125] @@ -245,10 +245,10 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] Select Operator [SEL_123] (rows=63350266 width=77) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_198] + Please refer to the previous Merge Join Operator [MERGEJOIN_200] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_233] - Group By Operator [GBY_232] (rows=1 width=12) + BROADCAST [RS_235] + Group By Operator [GBY_234] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=63350264)"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_130] @@ -256,61 +256,61 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=63350264)"] Select Operator [SEL_128] (rows=63350266 width=77) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_198] + Please refer to the previous Merge Join Operator [MERGEJOIN_200] <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_241] - Group By Operator [GBY_240] (rows=1 width=12) + BROADCAST [RS_243] + Group By Operator [GBY_242] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 20 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_239] - Group By Operator [GBY_238] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_241] + Group By Operator [GBY_240] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_237] (rows=462000 width=1436) + Select Operator [SEL_239] (rows=462000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_235] + Please refer to the previous Select Operator [SEL_237] <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_249] - Group By Operator [GBY_248] (rows=1 width=12) + BROADCAST [RS_251] + Group By Operator [GBY_250] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_247] - Group By Operator [GBY_246] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_249] + Group By Operator [GBY_248] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_245] (rows=1704 width=1910) + Select Operator [SEL_247] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_243] + Please refer to the previous Select Operator [SEL_245] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_196] (rows=316788826 width=135) - Conds:RS_213._col0=RS_205._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_198] (rows=316788826 width=135) + Conds:RS_215._col0=RS_207._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_205] + PARTITION_ONLY_SHUFFLE [RS_207] PartitionCols:_col0 - Select Operator [SEL_204] (rows=36525 width=1119) + Select Operator [SEL_206] (rows=36525 width=1119) Output:["_col0"] - Filter Operator [FIL_203] (rows=36525 width=1119) + Filter Operator [FIL_205] (rows=36525 width=1119) predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_213] + SHUFFLE [RS_215] PartitionCols:_col0 - Select Operator [SEL_212] (rows=287989836 width=135) + Select Operator [SEL_214] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_211] (rows=287989836 width=135) + Filter Operator [FIL_213] (rows=287989836 width=135) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_210] - Group By Operator [GBY_209] (rows=1 width=12) + BROADCAST [RS_212] + Group By Operator [GBY_211] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_208] - Group By Operator [GBY_207] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_210] + Group By Operator [GBY_209] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_206] (rows=36525 width=1119) + Select Operator [SEL_208] (rows=36525 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_204] + Please refer to the previous Select Operator [SEL_206] diff --git a/ql/src/test/results/clientpositive/perf/tez/query31.q.out b/ql/src/test/results/clientpositive/perf/tez/query31.q.out index c4d717d868..22aee37ca4 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query31.q.out @@ -152,377 +152,377 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Filter Operator [FIL_130] (rows=287493839 width=88) predicate:(CASE WHEN ((_col1 > 0)) THEN (CASE WHEN ((_col9 > 0)) THEN (((_col11 / _col9) > (_col5 / _col1))) ELSE ((null > (_col5 / _col1))) END) ELSE (CASE WHEN ((_col9 > 0)) THEN (((_col11 / _col9) > null)) ELSE (null) END) END and CASE WHEN ((_col3 > 0)) THEN (CASE WHEN ((_col7 > 0)) THEN (((_col9 / _col7) > (_col1 / _col3))) ELSE ((null > (_col1 / _col3))) END) ELSE (CASE WHEN ((_col7 > 0)) THEN (((_col9 / _col7) > null)) ELSE (null) END) END) - Merge Join Operator [MERGEJOIN_439] (rows=1149975359 width=88) - Conds:RS_510._col0=RS_519._col0(Inner),RS_510._col0=RS_528._col0(Inner),RS_510._col0=RS_128._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col7","_col9","_col11"] + Merge Join Operator [MERGEJOIN_448] (rows=1149975359 width=88) + Conds:RS_519._col0=RS_528._col0(Inner),RS_519._col0=RS_537._col0(Inner),RS_519._col0=RS_128._col0(Inner),Output:["_col0","_col1","_col3","_col5","_col7","_col9","_col11"] <-Reducer 10 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_519] + FORWARD [RS_528] PartitionCols:_col0 - Group By Operator [GBY_518] (rows=348477374 width=88) + Group By Operator [GBY_527] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 Group By Operator [GBY_36] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_429] (rows=696954748 width=88) - Conds:RS_32._col1=RS_482._col0(Inner),Output:["_col2","_col7"] + Merge Join Operator [MERGEJOIN_438] (rows=696954748 width=88) + Conds:RS_32._col1=RS_491._col0(Inner),Output:["_col2","_col7"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_482] + SHUFFLE [RS_491] PartitionCols:_col0 - Select Operator [SEL_479] (rows=40000000 width=1014) + Select Operator [SEL_488] (rows=40000000 width=1014) Output:["_col0","_col1"] - Filter Operator [FIL_478] (rows=40000000 width=1014) + Filter Operator [FIL_487] (rows=40000000 width=1014) predicate:(ca_address_sk is not null and ca_county is not null) TableScan [TS_6] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_428] (rows=633595212 width=88) - Conds:RS_517._col0=RS_454._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_437] (rows=633595212 width=88) + Conds:RS_526._col0=RS_463._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_454] + SHUFFLE [RS_463] PartitionCols:_col0 - Select Operator [SEL_447] (rows=18262 width=1119) + Select Operator [SEL_456] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_441] (rows=18262 width=1119) + Filter Operator [FIL_450] (rows=18262 width=1119) predicate:((d_qoy = 1) and (d_year = 2000) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_qoy"] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_517] + SHUFFLE [RS_526] PartitionCols:_col0 - Select Operator [SEL_516] (rows=575995635 width=88) + Select Operator [SEL_525] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_515] (rows=575995635 width=88) + Filter Operator [FIL_524] (rows=575995635 width=88) predicate:((ss_addr_sk BETWEEN DynamicValue(RS_33_customer_address_ca_address_sk_min) AND DynamicValue(RS_33_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_33_customer_address_ca_address_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_30_date_dim_d_date_sk_min) AND DynamicValue(RS_30_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_30_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) TableScan [TS_20] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_512] - Group By Operator [GBY_511] (rows=1 width=12) + BROADCAST [RS_521] + Group By Operator [GBY_520] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_471] - Group By Operator [GBY_465] (rows=1 width=12) + SHUFFLE [RS_480] + Group By Operator [GBY_474] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_455] (rows=18262 width=1119) + Select Operator [SEL_464] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_447] + Please refer to the previous Select Operator [SEL_456] <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_514] - Group By Operator [GBY_513] (rows=1 width=12) + BROADCAST [RS_523] + Group By Operator [GBY_522] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_499] - Group By Operator [GBY_493] (rows=1 width=12) + SHUFFLE [RS_508] + Group By Operator [GBY_502] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_483] (rows=40000000 width=1014) + Select Operator [SEL_492] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 14 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_528] + FORWARD [RS_537] PartitionCols:_col0 - Group By Operator [GBY_527] (rows=348477374 width=88) + Group By Operator [GBY_536] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col0 Group By Operator [GBY_56] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_431] (rows=696954748 width=88) - Conds:RS_52._col1=RS_484._col0(Inner),Output:["_col2","_col7"] + Merge Join Operator [MERGEJOIN_440] (rows=696954748 width=88) + Conds:RS_52._col1=RS_493._col0(Inner),Output:["_col2","_col7"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_484] + SHUFFLE [RS_493] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_430] (rows=633595212 width=88) - Conds:RS_526._col0=RS_456._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_439] (rows=633595212 width=88) + Conds:RS_535._col0=RS_465._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_456] + SHUFFLE [RS_465] PartitionCols:_col0 - Select Operator [SEL_448] (rows=18262 width=1119) + Select Operator [SEL_457] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_442] (rows=18262 width=1119) + Filter Operator [FIL_451] (rows=18262 width=1119) predicate:((d_qoy = 3) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_526] + SHUFFLE [RS_535] PartitionCols:_col0 - Select Operator [SEL_525] (rows=575995635 width=88) + Select Operator [SEL_534] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_524] (rows=575995635 width=88) + Filter Operator [FIL_533] (rows=575995635 width=88) predicate:((ss_addr_sk BETWEEN DynamicValue(RS_53_customer_address_ca_address_sk_min) AND DynamicValue(RS_53_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_53_customer_address_ca_address_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) TableScan [TS_40] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_521] - Group By Operator [GBY_520] (rows=1 width=12) + BROADCAST [RS_530] + Group By Operator [GBY_529] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_472] - Group By Operator [GBY_466] (rows=1 width=12) + SHUFFLE [RS_481] + Group By Operator [GBY_475] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_457] (rows=18262 width=1119) + Select Operator [SEL_466] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_448] + Please refer to the previous Select Operator [SEL_457] <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_523] - Group By Operator [GBY_522] (rows=1 width=12) + BROADCAST [RS_532] + Group By Operator [GBY_531] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_500] - Group By Operator [GBY_494] (rows=1 width=12) + SHUFFLE [RS_509] + Group By Operator [GBY_503] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_485] (rows=40000000 width=1014) + Select Operator [SEL_494] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 19 [ONE_TO_ONE_EDGE] FORWARD [RS_128] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_438] (rows=191667561 width=135) - Conds:RS_537._col0=RS_546._col0(Inner),RS_537._col0=RS_555._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + Merge Join Operator [MERGEJOIN_447] (rows=191667561 width=135) + Conds:RS_546._col0=RS_555._col0(Inner),RS_546._col0=RS_564._col0(Inner),Output:["_col0","_col1","_col3","_col5"] <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_537] + FORWARD [RS_546] PartitionCols:_col0 - Group By Operator [GBY_536] (rows=87121617 width=135) + Group By Operator [GBY_545] (rows=87121617 width=135) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_77] PartitionCols:_col0 Group By Operator [GBY_76] (rows=174243235 width=135) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_433] (rows=174243235 width=135) - Conds:RS_72._col1=RS_486._col0(Inner),Output:["_col2","_col7"] + Merge Join Operator [MERGEJOIN_442] (rows=174243235 width=135) + Conds:RS_72._col1=RS_495._col0(Inner),Output:["_col2","_col7"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_486] + SHUFFLE [RS_495] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_72] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_432] (rows=158402938 width=135) - Conds:RS_535._col0=RS_458._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_441] (rows=158402938 width=135) + Conds:RS_544._col0=RS_467._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_458] + SHUFFLE [RS_467] PartitionCols:_col0 - Select Operator [SEL_449] (rows=18262 width=1119) + Select Operator [SEL_458] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_443] (rows=18262 width=1119) + Filter Operator [FIL_452] (rows=18262 width=1119) predicate:((d_qoy = 1) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_535] + SHUFFLE [RS_544] PartitionCols:_col0 - Select Operator [SEL_534] (rows=144002668 width=135) + Select Operator [SEL_543] (rows=144002668 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_533] (rows=144002668 width=135) + Filter Operator [FIL_542] (rows=144002668 width=135) predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_73_customer_address_ca_address_sk_min) AND DynamicValue(RS_73_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_73_customer_address_ca_address_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) TableScan [TS_60] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_530] - Group By Operator [GBY_529] (rows=1 width=12) + BROADCAST [RS_539] + Group By Operator [GBY_538] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_473] - Group By Operator [GBY_467] (rows=1 width=12) + SHUFFLE [RS_482] + Group By Operator [GBY_476] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_459] (rows=18262 width=1119) + Select Operator [SEL_468] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_449] + Please refer to the previous Select Operator [SEL_458] <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_532] - Group By Operator [GBY_531] (rows=1 width=12) + BROADCAST [RS_541] + Group By Operator [GBY_540] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_501] - Group By Operator [GBY_495] (rows=1 width=12) + SHUFFLE [RS_510] + Group By Operator [GBY_504] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_487] (rows=40000000 width=1014) + Select Operator [SEL_496] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 23 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_546] + FORWARD [RS_555] PartitionCols:_col0 - Group By Operator [GBY_545] (rows=87121617 width=135) + Group By Operator [GBY_554] (rows=87121617 width=135) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_97] PartitionCols:_col0 Group By Operator [GBY_96] (rows=174243235 width=135) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_435] (rows=174243235 width=135) - Conds:RS_92._col1=RS_488._col0(Inner),Output:["_col2","_col7"] + Merge Join Operator [MERGEJOIN_444] (rows=174243235 width=135) + Conds:RS_92._col1=RS_497._col0(Inner),Output:["_col2","_col7"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_488] + SHUFFLE [RS_497] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_92] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_434] (rows=158402938 width=135) - Conds:RS_544._col0=RS_460._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_443] (rows=158402938 width=135) + Conds:RS_553._col0=RS_469._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_460] + SHUFFLE [RS_469] PartitionCols:_col0 - Select Operator [SEL_450] (rows=18262 width=1119) + Select Operator [SEL_459] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_444] (rows=18262 width=1119) + Filter Operator [FIL_453] (rows=18262 width=1119) predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_544] + SHUFFLE [RS_553] PartitionCols:_col0 - Select Operator [SEL_543] (rows=144002668 width=135) + Select Operator [SEL_552] (rows=144002668 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_542] (rows=144002668 width=135) + Filter Operator [FIL_551] (rows=144002668 width=135) predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_93_customer_address_ca_address_sk_min) AND DynamicValue(RS_93_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_93_customer_address_ca_address_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_90_date_dim_d_date_sk_min) AND DynamicValue(RS_90_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_90_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) TableScan [TS_80] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_539] - Group By Operator [GBY_538] (rows=1 width=12) + BROADCAST [RS_548] + Group By Operator [GBY_547] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_474] - Group By Operator [GBY_468] (rows=1 width=12) + SHUFFLE [RS_483] + Group By Operator [GBY_477] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_461] (rows=18262 width=1119) + Select Operator [SEL_470] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_450] + Please refer to the previous Select Operator [SEL_459] <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_541] - Group By Operator [GBY_540] (rows=1 width=12) + BROADCAST [RS_550] + Group By Operator [GBY_549] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_502] - Group By Operator [GBY_496] (rows=1 width=12) + SHUFFLE [RS_511] + Group By Operator [GBY_505] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_489] (rows=40000000 width=1014) + Select Operator [SEL_498] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 27 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_555] + FORWARD [RS_564] PartitionCols:_col0 - Group By Operator [GBY_554] (rows=87121617 width=135) + Group By Operator [GBY_563] (rows=87121617 width=135) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_117] PartitionCols:_col0 Group By Operator [GBY_116] (rows=174243235 width=135) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_437] (rows=174243235 width=135) - Conds:RS_112._col1=RS_490._col0(Inner),Output:["_col2","_col7"] + Merge Join Operator [MERGEJOIN_446] (rows=174243235 width=135) + Conds:RS_112._col1=RS_499._col0(Inner),Output:["_col2","_col7"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_490] + SHUFFLE [RS_499] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_112] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_436] (rows=158402938 width=135) - Conds:RS_553._col0=RS_462._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_445] (rows=158402938 width=135) + Conds:RS_562._col0=RS_471._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_462] + SHUFFLE [RS_471] PartitionCols:_col0 - Select Operator [SEL_451] (rows=18262 width=1119) + Select Operator [SEL_460] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_445] (rows=18262 width=1119) + Filter Operator [FIL_454] (rows=18262 width=1119) predicate:((d_qoy = 3) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_553] + SHUFFLE [RS_562] PartitionCols:_col0 - Select Operator [SEL_552] (rows=144002668 width=135) + Select Operator [SEL_561] (rows=144002668 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_551] (rows=144002668 width=135) + Filter Operator [FIL_560] (rows=144002668 width=135) predicate:((ws_bill_addr_sk BETWEEN DynamicValue(RS_113_customer_address_ca_address_sk_min) AND DynamicValue(RS_113_customer_address_ca_address_sk_max) and in_bloom_filter(ws_bill_addr_sk, DynamicValue(RS_113_customer_address_ca_address_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_110_date_dim_d_date_sk_min) AND DynamicValue(RS_110_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_110_date_dim_d_date_sk_bloom_filter))) and ws_bill_addr_sk is not null and ws_sold_date_sk is not null) TableScan [TS_100] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_addr_sk","ws_ext_sales_price"] <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_548] - Group By Operator [GBY_547] (rows=1 width=12) + BROADCAST [RS_557] + Group By Operator [GBY_556] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_475] - Group By Operator [GBY_469] (rows=1 width=12) + SHUFFLE [RS_484] + Group By Operator [GBY_478] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_463] (rows=18262 width=1119) + Select Operator [SEL_472] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_451] + Please refer to the previous Select Operator [SEL_460] <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_550] - Group By Operator [GBY_549] (rows=1 width=12) + BROADCAST [RS_559] + Group By Operator [GBY_558] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_503] - Group By Operator [GBY_497] (rows=1 width=12) + SHUFFLE [RS_512] + Group By Operator [GBY_506] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_491] (rows=40000000 width=1014) + Select Operator [SEL_500] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_510] + FORWARD [RS_519] PartitionCols:_col0 - Group By Operator [GBY_509] (rows=348477374 width=88) + Group By Operator [GBY_518] (rows=348477374 width=88) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=696954748 width=88) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col7 - Merge Join Operator [MERGEJOIN_427] (rows=696954748 width=88) - Conds:RS_12._col1=RS_480._col0(Inner),Output:["_col2","_col7"] + Merge Join Operator [MERGEJOIN_436] (rows=696954748 width=88) + Conds:RS_12._col1=RS_489._col0(Inner),Output:["_col2","_col7"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_480] + SHUFFLE [RS_489] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_426] (rows=633595212 width=88) - Conds:RS_508._col0=RS_452._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_435] (rows=633595212 width=88) + Conds:RS_517._col0=RS_461._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_452] + SHUFFLE [RS_461] PartitionCols:_col0 - Select Operator [SEL_446] (rows=18262 width=1119) + Select Operator [SEL_455] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_440] (rows=18262 width=1119) + Filter Operator [FIL_449] (rows=18262 width=1119) predicate:((d_qoy = 2) and (d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_508] + SHUFFLE [RS_517] PartitionCols:_col0 - Select Operator [SEL_507] (rows=575995635 width=88) + Select Operator [SEL_516] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_506] (rows=575995635 width=88) + Filter Operator [FIL_515] (rows=575995635 width=88) predicate:((ss_addr_sk BETWEEN DynamicValue(RS_13_customer_address_ca_address_sk_min) AND DynamicValue(RS_13_customer_address_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_13_customer_address_ca_address_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and ss_addr_sk is not null and ss_sold_date_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_addr_sk","ss_ext_sales_price"] <-Reducer 30 [BROADCAST_EDGE] vectorized - BROADCAST [RS_505] - Group By Operator [GBY_504] (rows=1 width=12) + BROADCAST [RS_514] + Group By Operator [GBY_513] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] <-Map 29 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_498] - Group By Operator [GBY_492] (rows=1 width=12) + SHUFFLE [RS_507] + Group By Operator [GBY_501] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_481] (rows=40000000 width=1014) + Select Operator [SEL_490] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_479] + Please refer to the previous Select Operator [SEL_488] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_477] - Group By Operator [GBY_476] (rows=1 width=12) + BROADCAST [RS_486] + Group By Operator [GBY_485] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_470] - Group By Operator [GBY_464] (rows=1 width=12) + SHUFFLE [RS_479] + Group By Operator [GBY_473] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_453] (rows=18262 width=1119) + Select Operator [SEL_462] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_446] + Please refer to the previous Select Operator [SEL_455] diff --git a/ql/src/test/results/clientpositive/perf/tez/query32.q.out b/ql/src/test/results/clientpositive/perf/tez/query32.q.out index 6be6f7aa6e..2a472c1f55 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query32.q.out @@ -55,27 +55,24 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 6 (BROADCAST_EDGE) -Map 11 <- Reducer 10 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE) -Reducer 10 <- Map 5 (CUSTOM_SIMPLE_EDGE) -Reducer 13 <- Map 12 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) +Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) -Reducer 7 <- Map 11 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) -Reducer 8 <- Reducer 7 (SIMPLE_EDGE) -Reducer 9 <- Map 12 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) +Reducer 5 <- Reducer 2 (SIMPLE_EDGE) +Reducer 6 <- Map 9 (SIMPLE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE) +Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 Reducer 4 vectorized - File Output Operator [FS_136] - Limit [LIM_135] (rows=1 width=112) + File Output Operator [FS_129] + Limit [LIM_128] (rows=1 width=112) Number of rows:100 - Group By Operator [GBY_134] (rows=1 width=112) + Group By Operator [GBY_127] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_36] @@ -85,103 +82,78 @@ Stage-0 Output:["_col2"] Filter Operator [FIL_33] (rows=116155905 width=135) predicate:(_col2 > CAST( (1.3 * _col6) AS decimal(14,7))) - Merge Join Operator [MERGEJOIN_102] (rows=348467716 width=135) + Merge Join Operator [MERGEJOIN_104] (rows=348467716 width=135) Conds:RS_30._col1=RS_31._col2(Inner),Output:["_col2","_col6"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_99] (rows=316788826 width=135) - Conds:RS_117._col0=RS_105._col0(Inner),Output:["_col1","_col2"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_105] + Merge Join Operator [MERGEJOIN_101] (rows=316788826 width=135) + Conds:RS_123._col0=RS_107._col0(Inner),Output:["_col1","_col2"] + <-Map 7 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_104] (rows=8116 width=1119) + Select Operator [SEL_106] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_103] (rows=8116 width=1119) + Filter Operator [FIL_105] (rows=8116 width=1119) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_116] (rows=287989836 width=135) + Select Operator [SEL_122] (rows=287989836 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_115] (rows=287989836 width=135) - predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) + Filter Operator [FIL_121] (rows=287989836 width=135) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_discount_amt"] - <-Reducer 6 [BROADCAST_EDGE] vectorized - BROADCAST [RS_114] - Group By Operator [GBY_113] (rows=1 width=12) + <-Reducer 10 [BROADCAST_EDGE] vectorized + BROADCAST [RS_120] + Group By Operator [GBY_119] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] + <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + Group By Operator [GBY_117] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_116] (rows=231000 width=1436) + Output:["_col0"] + Select Operator [SEL_114] (rows=231000 width=1436) + Output:["_col0"] + Filter Operator [FIL_113] (rows=231000 width=1436) + predicate:((i_manufact_id = 269) and i_item_sk is not null) + TableScan [TS_20] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] + <-Reducer 8 [BROADCAST_EDGE] vectorized + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 7 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_110] Group By Operator [GBY_109] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_106] (rows=8116 width=1119) + Select Operator [SEL_108] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_104] - <-Reducer 9 [ONE_TO_ONE_EDGE] + Please refer to the previous Select Operator [SEL_106] + <-Reducer 6 [ONE_TO_ONE_EDGE] FORWARD [RS_31] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_101] (rows=174233858 width=135) - Conds:RS_133._col0=RS_122._col0(Inner),Output:["_col1","_col2"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] + Merge Join Operator [MERGEJOIN_103] (rows=174233858 width=135) + Conds:RS_126._col0=RS_115._col0(Inner),Output:["_col1","_col2"] + <-Map 9 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_115] PartitionCols:_col0 - Select Operator [SEL_121] (rows=231000 width=1436) - Output:["_col0"] - Filter Operator [FIL_120] (rows=231000 width=1436) - predicate:((i_manufact_id = 269) and i_item_sk is not null) - TableScan [TS_20] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] - <-Reducer 8 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_133] + Please refer to the previous Select Operator [SEL_114] + <-Reducer 5 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_126] PartitionCols:_col0 - Select Operator [SEL_132] (rows=158394413 width=135) + Select Operator [SEL_125] (rows=158394413 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_131] (rows=158394413 width=135) + Group By Operator [GBY_124] (rows=158394413 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Reducer 7 [SIMPLE_EDGE] + <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=316788826 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Merge Join Operator [MERGEJOIN_100] (rows=316788826 width=135) - Conds:RS_130._col0=RS_107._col0(Inner),Output:["_col1","_col2"] - <-Map 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_104] - <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_130] - PartitionCols:_col0 - Select Operator [SEL_129] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_128] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_6] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_item_sk","cs_ext_discount_amt"] - <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_119] - Group By Operator [GBY_118] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 5 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] - Group By Operator [GBY_110] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_108] (rows=8116 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_104] - <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_127] - Group By Operator [GBY_126] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] - Group By Operator [GBY_124] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_123] (rows=231000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_121] + Please refer to the previous Merge Join Operator [MERGEJOIN_101] diff --git a/ql/src/test/results/clientpositive/perf/tez/query39.q.out b/ql/src/test/results/clientpositive/perf/tez/query39.q.out index 5966e243ea..514f5d4535 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query39.q.out @@ -69,23 +69,23 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_220] - Select Operator [SEL_219] (rows=13756683 width=15) + File Output Operator [FS_232] + Select Operator [SEL_231] (rows=13756683 width=15) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_60] - Merge Join Operator [MERGEJOIN_190] (rows=13756683 width=15) - Conds:RS_213._col0, _col1=RS_218._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_202] (rows=13756683 width=15) + Conds:RS_225._col0, _col1=RS_230._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_218] + SHUFFLE [RS_230] PartitionCols:_col0, _col1 - Select Operator [SEL_217] (rows=12506076 width=15) + Select Operator [SEL_229] (rows=12506076 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_216] (rows=12506076 width=15) + Filter Operator [FIL_228] (rows=12506076 width=15) predicate:CASE WHEN (((_col3 / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4)) > 1.0D)) END - Select Operator [SEL_215] (rows=25012152 width=15) + Select Operator [SEL_227] (rows=25012152 width=15) Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_214] (rows=25012152 width=15) + Group By Operator [GBY_226] (rows=25012152 width=15) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_52] @@ -94,64 +94,64 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 Select Operator [SEL_49] (rows=50024305 width=15) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_189] (rows=50024305 width=15) - Conds:RS_46._col2=RS_208._col0(Inner),Output:["_col3","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_201] (rows=50024305 width=15) + Conds:RS_46._col2=RS_220._col0(Inner),Output:["_col3","_col7","_col8","_col9"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_208] + SHUFFLE [RS_220] PartitionCols:_col0 - Select Operator [SEL_206] (rows=27 width=1029) + Select Operator [SEL_218] (rows=27 width=1029) Output:["_col0","_col1"] - Filter Operator [FIL_205] (rows=27 width=1029) + Filter Operator [FIL_217] (rows=27 width=1029) predicate:w_warehouse_sk is not null TableScan [TS_9] (rows=27 width=1029) default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_188] (rows=45476640 width=15) - Conds:RS_43._col1=RS_204._col0(Inner),Output:["_col2","_col3","_col7"] + Merge Join Operator [MERGEJOIN_200] (rows=45476640 width=15) + Conds:RS_43._col1=RS_216._col0(Inner),Output:["_col2","_col3","_col7"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] + SHUFFLE [RS_216] PartitionCols:_col0 - Select Operator [SEL_202] (rows=462000 width=1436) + Select Operator [SEL_214] (rows=462000 width=1436) Output:["_col0"] - Filter Operator [FIL_201] (rows=462000 width=1436) + Filter Operator [FIL_213] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_6] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_187] (rows=41342400 width=15) - Conds:RS_194._col0=RS_200._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_199] (rows=41342400 width=15) + Conds:RS_206._col0=RS_212._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] + SHUFFLE [RS_206] PartitionCols:_col0 - Select Operator [SEL_192] (rows=37584000 width=15) + Select Operator [SEL_204] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_191] (rows=37584000 width=15) + Filter Operator [FIL_203] (rows=37584000 width=15) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) TableScan [TS_0] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + SHUFFLE [RS_212] PartitionCols:_col0 - Select Operator [SEL_198] (rows=18262 width=1119) + Select Operator [SEL_210] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_196] (rows=18262 width=1119) + Filter Operator [FIL_208] (rows=18262 width=1119) predicate:((d_moy = 5) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_213] + SHUFFLE [RS_225] PartitionCols:_col0, _col1 - Select Operator [SEL_212] (rows=12506076 width=15) + Select Operator [SEL_224] (rows=12506076 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_211] (rows=12506076 width=15) + Filter Operator [FIL_223] (rows=12506076 width=15) predicate:CASE WHEN (((_col3 / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4)) > 1.0D)) END - Select Operator [SEL_210] (rows=25012152 width=15) + Select Operator [SEL_222] (rows=25012152 width=15) Output:["_col0","_col1","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_209] (rows=25012152 width=15) + Group By Operator [GBY_221] (rows=25012152 width=15) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] @@ -160,36 +160,36 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 Select Operator [SEL_21] (rows=50024305 width=15) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_186] (rows=50024305 width=15) - Conds:RS_18._col2=RS_207._col0(Inner),Output:["_col3","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_198] (rows=50024305 width=15) + Conds:RS_18._col2=RS_219._col0(Inner),Output:["_col3","_col7","_col8","_col9"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] + SHUFFLE [RS_219] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_206] + Please refer to the previous Select Operator [SEL_218] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_185] (rows=45476640 width=15) - Conds:RS_15._col1=RS_203._col0(Inner),Output:["_col2","_col3","_col7"] + Merge Join Operator [MERGEJOIN_197] (rows=45476640 width=15) + Conds:RS_15._col1=RS_215._col0(Inner),Output:["_col2","_col3","_col7"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + SHUFFLE [RS_215] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_202] + Please refer to the previous Select Operator [SEL_214] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_184] (rows=41342400 width=15) - Conds:RS_193._col0=RS_199._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_196] (rows=41342400 width=15) + Conds:RS_205._col0=RS_211._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + SHUFFLE [RS_205] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_192] + Please refer to the previous Select Operator [SEL_204] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_199] + SHUFFLE [RS_211] PartitionCols:_col0 - Select Operator [SEL_197] (rows=18262 width=1119) + Select Operator [SEL_209] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_195] (rows=18262 width=1119) + Filter Operator [FIL_207] (rows=18262 width=1119) predicate:((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] diff --git a/ql/src/test/results/clientpositive/perf/tez/query40.q.out b/ql/src/test/results/clientpositive/perf/tez/query40.q.out index 2f116f12eb..9920ad3296 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query40.q.out @@ -71,14 +71,14 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_134] - Limit [LIM_133] (rows=100 width=135) + File Output Operator [FS_135] + Limit [LIM_134] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_132] (rows=210822976 width=135) + Select Operator [SEL_133] (rows=210822976 width=135) Output:["_col0","_col1","_col2","_col3"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] - Group By Operator [GBY_130] (rows=210822976 width=135) + SHUFFLE [RS_132] + Group By Operator [GBY_131] (rows=210822976 width=135) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] @@ -87,98 +87,98 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 Select Operator [SEL_27] (rows=421645953 width=135) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_99] (rows=421645953 width=135) - Conds:RS_24._col1=RS_118._col0(Inner),Output:["_col4","_col7","_col9","_col11","_col14"] + Merge Join Operator [MERGEJOIN_100] (rows=421645953 width=135) + Conds:RS_24._col1=RS_119._col0(Inner),Output:["_col4","_col7","_col9","_col11","_col14"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_117] (rows=27 width=1029) + Select Operator [SEL_118] (rows=27 width=1029) Output:["_col0","_col1"] - Filter Operator [FIL_116] (rows=27 width=1029) + Filter Operator [FIL_117] (rows=27 width=1029) predicate:w_warehouse_sk is not null TableScan [TS_12] (rows=27 width=1029) default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_state"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_98] (rows=383314495 width=135) - Conds:RS_21._col2=RS_110._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col11"] + Merge Join Operator [MERGEJOIN_99] (rows=383314495 width=135) + Conds:RS_21._col2=RS_111._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col11"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] + SHUFFLE [RS_111] PartitionCols:_col0 - Select Operator [SEL_109] (rows=51333 width=1436) + Select Operator [SEL_110] (rows=51333 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_108] (rows=51333 width=1436) + Filter Operator [FIL_109] (rows=51333 width=1436) predicate:(i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) TableScan [TS_9] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_current_price"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_97] (rows=348467716 width=135) - Conds:RS_18._col0=RS_102._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9"] + Merge Join Operator [MERGEJOIN_98] (rows=348467716 width=135) + Conds:RS_18._col0=RS_103._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_102] + SHUFFLE [RS_103] PartitionCols:_col0 - Select Operator [SEL_101] (rows=8116 width=1119) + Select Operator [SEL_102] (rows=8116 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_100] (rows=8116 width=1119) + Filter Operator [FIL_101] (rows=8116 width=1119) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_96] (rows=316788826 width=135) - Conds:RS_126._col2, _col3=RS_129._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] + Merge Join Operator [MERGEJOIN_97] (rows=316788826 width=135) + Conds:RS_127._col2, _col3=RS_130._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + SHUFFLE [RS_127] PartitionCols:_col2, _col3 - Select Operator [SEL_125] (rows=287989836 width=135) + Select Operator [SEL_126] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_124] (rows=287989836 width=135) + Filter Operator [FIL_125] (rows=287989836 width=135) predicate:((cs_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) and (cs_warehouse_sk BETWEEN DynamicValue(RS_25_warehouse_w_warehouse_sk_min) AND DynamicValue(RS_25_warehouse_w_warehouse_sk_max) and in_bloom_filter(cs_warehouse_sk, DynamicValue(RS_25_warehouse_w_warehouse_sk_bloom_filter))) and cs_item_sk is not null and cs_sold_date_sk is not null and cs_warehouse_sk is not null) TableScan [TS_0] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_107] - Group By Operator [GBY_106] (rows=1 width=12) + BROADCAST [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_105] - Group By Operator [GBY_104] (rows=1 width=12) + SHUFFLE [RS_106] + Group By Operator [GBY_105] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_103] (rows=8116 width=1119) + Select Operator [SEL_104] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_101] + Please refer to the previous Select Operator [SEL_102] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_115] - Group By Operator [GBY_114] (rows=1 width=12) + BROADCAST [RS_116] + Group By Operator [GBY_115] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_113] - Group By Operator [GBY_112] (rows=1 width=12) + SHUFFLE [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_111] (rows=51333 width=1436) + Select Operator [SEL_112] (rows=51333 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_109] + Please refer to the previous Select Operator [SEL_110] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_123] - Group By Operator [GBY_122] (rows=1 width=12) + BROADCAST [RS_124] + Group By Operator [GBY_123] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] - Group By Operator [GBY_120] (rows=1 width=12) + SHUFFLE [RS_122] + Group By Operator [GBY_121] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_119] (rows=27 width=1029) + Select Operator [SEL_120] (rows=27 width=1029) Output:["_col0"] - Please refer to the previous Select Operator [SEL_117] + Please refer to the previous Select Operator [SEL_118] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] + SHUFFLE [RS_130] PartitionCols:_col0, _col1 - Select Operator [SEL_128] (rows=28798881 width=106) + Select Operator [SEL_129] (rows=28798881 width=106) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_127] (rows=28798881 width=106) + Filter Operator [FIL_128] (rows=28798881 width=106) predicate:cr_item_sk is not null TableScan [TS_3] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query54.q.out b/ql/src/test/results/clientpositive/perf/tez/query54.q.out index 8ab239ce26..3e3c607e0d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query54.q.out @@ -1,7 +1,7 @@ -Warning: Shuffle Join MERGEJOIN[269][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product -Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product -Warning: Shuffle Join MERGEJOIN[268][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product -Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[271][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[272][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4]] in Stage 'Reducer 5' is a cross product +Warning: Shuffle Join MERGEJOIN[270][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 33' is a cross product +Warning: Shuffle Join MERGEJOIN[273][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 6' is a cross product PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk @@ -148,25 +148,25 @@ Stage-0 limit:100 Stage-1 Reducer 9 vectorized - File Output Operator [FS_358] - Limit [LIM_357] (rows=100 width=158) + File Output Operator [FS_360] + Limit [LIM_359] (rows=100 width=158) Number of rows:100 - Select Operator [SEL_356] (rows=1614130953450400 width=158) + Select Operator [SEL_358] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_355] - Select Operator [SEL_354] (rows=1614130953450400 width=158) + SHUFFLE [RS_357] + Select Operator [SEL_356] (rows=1614130953450400 width=158) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_353] (rows=1614130953450400 width=158) + Group By Operator [GBY_355] (rows=1614130953450400 width=158) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] + SHUFFLE [RS_354] PartitionCols:_col0 - Group By Operator [GBY_351] (rows=3228261906900801 width=158) + Group By Operator [GBY_353] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_350] (rows=3228261906900801 width=158) + Select Operator [SEL_352] (rows=3228261906900801 width=158) Output:["_col0"] - Group By Operator [GBY_349] (rows=3228261906900801 width=158) + Group By Operator [GBY_351] (rows=3228261906900801 width=158) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_119] @@ -179,257 +179,257 @@ Stage-0 predicate:_col11 BETWEEN _col13 AND _col15 Select Operator [SEL_115] (rows=58108714324214428 width=158) Output:["_col0","_col4","_col11","_col13","_col15"] - Merge Join Operator [MERGEJOIN_271] (rows=58108714324214428 width=158) + Merge Join Operator [MERGEJOIN_273] (rows=58108714324214428 width=158) Conds:(Inner),Output:["_col0","_col2","_col6","_col13","_col15"] <-Reducer 33 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_112] - Merge Join Operator [MERGEJOIN_268] (rows=9131 width=1128) + Merge Join Operator [MERGEJOIN_270] (rows=9131 width=1128) Conds:(Right Outer),Output:["_col0"] <-Reducer 32 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_340] - Group By Operator [GBY_339] (rows=9131 width=1119) + PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_341] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_328] + SHUFFLE [RS_330] PartitionCols:_col0 - Group By Operator [GBY_325] (rows=18262 width=1119) + Group By Operator [GBY_327] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_322] (rows=18262 width=1119) + Select Operator [SEL_324] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_320] (rows=18262 width=1119) + Filter Operator [FIL_322] (rows=18262 width=1119) predicate:((d_moy = 3) and (d_year = 1999)) TableScan [TS_73] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_year","d_moy"] <-Reducer 35 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_348] - Select Operator [SEL_347] (rows=1 width=8) - Filter Operator [FIL_346] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_350] + Select Operator [SEL_349] (rows=1 width=8) + Filter Operator [FIL_348] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_345] (rows=1 width=8) + Group By Operator [GBY_347] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_344] - Group By Operator [GBY_343] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_346] + Group By Operator [GBY_345] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_342] (rows=9131 width=1119) - Group By Operator [GBY_341] (rows=9131 width=1119) + Select Operator [SEL_344] (rows=9131 width=1119) + Group By Operator [GBY_343] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] + SHUFFLE [RS_331] PartitionCols:_col0 - Group By Operator [GBY_326] (rows=18262 width=1119) + Group By Operator [GBY_328] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_323] (rows=18262 width=1119) + Select Operator [SEL_325] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_320] + Please refer to the previous Filter Operator [FIL_322] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_113] Select Operator [SEL_108] (rows=6363893803988 width=1217) Output:["_col0","_col4","_col11","_col13"] - Merge Join Operator [MERGEJOIN_270] (rows=6363893803988 width=1217) + Merge Join Operator [MERGEJOIN_272] (rows=6363893803988 width=1217) Conds:(Left Outer),Output:["_col2","_col4","_col10","_col13"] <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_332] - Group By Operator [GBY_330] (rows=9131 width=1119) + PARTITION_ONLY_SHUFFLE [RS_334] + Group By Operator [GBY_332] (rows=9131 width=1119) Output:["_col0"],keys:KEY._col0 <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] + SHUFFLE [RS_329] PartitionCols:_col0 - Group By Operator [GBY_324] (rows=18262 width=1119) + Group By Operator [GBY_326] (rows=18262 width=1119) Output:["_col0"],keys:_col0 - Select Operator [SEL_321] (rows=18262 width=1119) + Select Operator [SEL_323] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Filter Operator [FIL_320] + Please refer to the previous Filter Operator [FIL_322] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_105] - Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=97) + Merge Join Operator [MERGEJOIN_271] (rows=696954748 width=97) Conds:(Inner),Output:["_col2","_col4","_col10"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_102] - Merge Join Operator [MERGEJOIN_267] (rows=696954748 width=88) + Merge Join Operator [MERGEJOIN_269] (rows=696954748 width=88) Conds:RS_99._col1=RS_100._col5(Inner),Output:["_col2","_col4","_col10"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_100] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_266] (rows=316240138 width=135) - Conds:RS_69._col0=RS_319._col1(Inner),Output:["_col5"] + Merge Join Operator [MERGEJOIN_268] (rows=316240138 width=135) + Conds:RS_69._col0=RS_321._col1(Inner),Output:["_col5"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_262] (rows=44000000 width=1014) - Conds:RS_295._col1, _col2=RS_298._col0, _col1(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_264] (rows=44000000 width=1014) + Conds:RS_297._col1, _col2=RS_300._col0, _col1(Inner),Output:["_col0"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] + SHUFFLE [RS_297] PartitionCols:_col1, _col2 - Select Operator [SEL_294] (rows=40000000 width=1014) + Select Operator [SEL_296] (rows=40000000 width=1014) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_293] (rows=40000000 width=1014) + Filter Operator [FIL_295] (rows=40000000 width=1014) predicate:(ca_address_sk is not null and ca_county is not null and ca_state is not null) TableScan [TS_29] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_county","ca_state"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + SHUFFLE [RS_300] PartitionCols:_col0, _col1 - Select Operator [SEL_297] (rows=1704 width=1910) + Select Operator [SEL_299] (rows=1704 width=1910) Output:["_col0","_col1"] - Filter Operator [FIL_296] (rows=1704 width=1910) + Filter Operator [FIL_298] (rows=1704 width=1910) predicate:(s_county is not null and s_state is not null) TableScan [TS_32] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_county","s_state"] <-Reducer 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + SHUFFLE [RS_321] PartitionCols:_col1 - Select Operator [SEL_318] (rows=287491029 width=135) + Select Operator [SEL_320] (rows=287491029 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_317] (rows=287491029 width=135) + Group By Operator [GBY_319] (rows=287491029 width=135) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0, _col1 Group By Operator [GBY_62] (rows=574982058 width=135) Output:["_col0","_col1"],keys:_col10, _col9 - Merge Join Operator [MERGEJOIN_265] (rows=574982058 width=135) - Conds:RS_58._col1=RS_313._col0(Inner),Output:["_col9","_col10"] + Merge Join Operator [MERGEJOIN_267] (rows=574982058 width=135) + Conds:RS_58._col1=RS_315._col0(Inner),Output:["_col9","_col10"] <-Map 27 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_313] + PARTITION_ONLY_SHUFFLE [RS_315] PartitionCols:_col0 - Select Operator [SEL_312] (rows=80000000 width=860) + Select Operator [SEL_314] (rows=80000000 width=860) Output:["_col0","_col1"] - Filter Operator [FIL_311] (rows=80000000 width=860) + Filter Operator [FIL_313] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_49] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_addr_sk"] <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_264] (rows=522710951 width=135) - Conds:RS_55._col2=RS_307._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_266] (rows=522710951 width=135) + Conds:RS_55._col2=RS_309._col0(Inner),Output:["_col1"] <-Map 25 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_307] + PARTITION_ONLY_SHUFFLE [RS_309] PartitionCols:_col0 - Select Operator [SEL_306] (rows=115500 width=1436) + Select Operator [SEL_308] (rows=115500 width=1436) Output:["_col0"] - Filter Operator [FIL_305] (rows=115500 width=1436) + Filter Operator [FIL_307] (rows=115500 width=1436) predicate:((i_category = 'Jewelry') and (i_class = 'consignment') and i_item_sk is not null) TableScan [TS_46] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_class","i_category"] <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_263] (rows=475191764 width=135) - Conds:Union 17._col0=RS_301._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_265] (rows=475191764 width=135) + Conds:Union 17._col0=RS_303._col0(Inner),Output:["_col1","_col2"] <-Map 23 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_301] + PARTITION_ONLY_SHUFFLE [RS_303] PartitionCols:_col0 - Select Operator [SEL_300] (rows=18262 width=1119) + Select Operator [SEL_302] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_299] (rows=18262 width=1119) + Filter Operator [FIL_301] (rows=18262 width=1119) predicate:((d_moy = 3) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_43] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Union 17 [SIMPLE_EDGE] <-Map 16 [CONTAINS] vectorized - Reduce Output Operator [RS_369] + Reduce Output Operator [RS_371] PartitionCols:_col0 - Select Operator [SEL_368] (rows=287989836 width=135) + Select Operator [SEL_370] (rows=287989836 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_367] (rows=287989836 width=135) + Filter Operator [FIL_369] (rows=287989836 width=135) predicate:((cs_bill_customer_sk BETWEEN DynamicValue(RS_59_customer_c_customer_sk_min) AND DynamicValue(RS_59_customer_c_customer_sk_max) and in_bloom_filter(cs_bill_customer_sk, DynamicValue(RS_59_customer_c_customer_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_56_item_i_item_sk_min) AND DynamicValue(RS_56_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_56_item_i_item_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_272] (rows=287989836 width=135) + TableScan [TS_274] (rows=287989836 width=135) Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"] <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_360] - Group By Operator [GBY_359] (rows=1 width=12) + BROADCAST [RS_362] + Group By Operator [GBY_361] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 23 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_304] - Group By Operator [GBY_303] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_306] + Group By Operator [GBY_305] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_302] (rows=18262 width=1119) + Select Operator [SEL_304] (rows=18262 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_300] + Please refer to the previous Select Operator [SEL_302] <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_363] - Group By Operator [GBY_362] (rows=1 width=12) + BROADCAST [RS_365] + Group By Operator [GBY_364] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 25 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_310] - Group By Operator [GBY_309] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_312] + Group By Operator [GBY_311] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_308] (rows=115500 width=1436) + Select Operator [SEL_310] (rows=115500 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_306] + Please refer to the previous Select Operator [SEL_308] <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_366] - Group By Operator [GBY_365] (rows=1 width=12) + BROADCAST [RS_368] + Group By Operator [GBY_367] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=80000000)"] <-Map 27 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_316] - Group By Operator [GBY_315] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_318] + Group By Operator [GBY_317] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=80000000)"] - Select Operator [SEL_314] (rows=80000000 width=860) + Select Operator [SEL_316] (rows=80000000 width=860) Output:["_col0"] - Please refer to the previous Select Operator [SEL_312] + Please refer to the previous Select Operator [SEL_314] <-Map 22 [CONTAINS] vectorized - Reduce Output Operator [RS_372] + Reduce Output Operator [RS_374] PartitionCols:_col0 - Select Operator [SEL_371] (rows=144002668 width=135) + Select Operator [SEL_373] (rows=144002668 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_370] (rows=144002668 width=135) + Filter Operator [FIL_372] (rows=144002668 width=135) predicate:((ws_item_sk BETWEEN DynamicValue(RS_56_item_i_item_sk_min) AND DynamicValue(RS_56_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_56_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_53_date_dim_d_date_sk_min) AND DynamicValue(RS_53_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_53_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_277] (rows=144002668 width=135) + TableScan [TS_279] (rows=144002668 width=135) Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk"] <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_361] - Please refer to the previous Group By Operator [GBY_359] + BROADCAST [RS_363] + Please refer to the previous Group By Operator [GBY_361] <-Reducer 26 [BROADCAST_EDGE] vectorized - BROADCAST [RS_364] - Please refer to the previous Group By Operator [GBY_362] + BROADCAST [RS_366] + Please refer to the previous Group By Operator [GBY_364] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_99] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_261] (rows=633595212 width=88) - Conds:RS_292._col0=RS_284._col0(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_263] (rows=633595212 width=88) + Conds:RS_294._col0=RS_286._col0(Inner),Output:["_col1","_col2","_col4"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_284] + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_283] (rows=73049 width=1119) + Select Operator [SEL_285] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_282] (rows=73049 width=1119) + Filter Operator [FIL_284] (rows=73049 width=1119) predicate:d_date_sk is not null TableScan [TS_26] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_292] + SHUFFLE [RS_294] PartitionCols:_col0 - Select Operator [SEL_291] (rows=575995635 width=88) + Select Operator [SEL_293] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_290] (rows=575995635 width=88) + Filter Operator [FIL_292] (rows=575995635 width=88) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_97_date_dim_d_date_sk_min) AND DynamicValue(RS_97_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_97_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_23] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk","ss_ext_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_289] - Group By Operator [GBY_288] (rows=1 width=12) + BROADCAST [RS_291] + Group By Operator [GBY_290] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) + SHUFFLE [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_285] (rows=73049 width=1119) + Select Operator [SEL_287] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_283] + Please refer to the previous Select Operator [SEL_285] <-Reducer 31 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Select Operator [SEL_337] (rows=1 width=8) - Filter Operator [FIL_336] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_340] + Select Operator [SEL_339] (rows=1 width=8) + Filter Operator [FIL_338] (rows=1 width=8) predicate:(sq_count_check(_col0) <= 1) - Group By Operator [GBY_335] (rows=1 width=8) + Group By Operator [GBY_337] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Reducer 30 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_334] - Group By Operator [GBY_333] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_336] + Group By Operator [GBY_335] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_331] (rows=9131 width=1119) - Please refer to the previous Group By Operator [GBY_330] + Select Operator [SEL_333] (rows=9131 width=1119) + Please refer to the previous Group By Operator [GBY_332] diff --git a/ql/src/test/results/clientpositive/perf/tez/query59.q.out b/ql/src/test/results/clientpositive/perf/tez/query59.q.out index 6b2dcc3873..29cf1366d8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query59.q.out @@ -109,51 +109,51 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_229] - Limit [LIM_228] (rows=100 width=88) + File Output Operator [FS_235] + Limit [LIM_234] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_227] (rows=421657640 width=88) + Select Operator [SEL_233] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_60] Select Operator [SEL_59] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Merge Join Operator [MERGEJOIN_180] (rows=421657640 width=88) + Merge Join Operator [MERGEJOIN_186] (rows=421657640 width=88) Conds:RS_56._col2, _col1=RS_57._col1, (_col0 - 52)(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col12","_col13","_col14","_col15","_col16","_col17"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col1, (_col0 - 52) Select Operator [SEL_55] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Merge Join Operator [MERGEJOIN_179] (rows=383325119 width=88) - Conds:RS_52._col1=RS_216._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col11"] + Merge Join Operator [MERGEJOIN_185] (rows=383325119 width=88) + Conds:RS_52._col1=RS_222._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col11"] <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_216] + PARTITION_ONLY_SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_215] (rows=1704 width=1910) + Select Operator [SEL_221] (rows=1704 width=1910) Output:["_col0","_col1"] - Filter Operator [FIL_214] (rows=1704 width=1910) + Filter Operator [FIL_220] (rows=1704 width=1910) predicate:(s_store_id is not null and s_store_sk is not null) TableScan [TS_46] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=348477374 width=88) - Conds:RS_226._col0=RS_211._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_184] (rows=348477374 width=88) + Conds:RS_232._col0=RS_217._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_211] + SHUFFLE [RS_217] PartitionCols:_col1 - Select Operator [SEL_209] (rows=8116 width=1119) + Select Operator [SEL_215] (rows=8116 width=1119) Output:["_col1"] - Filter Operator [FIL_207] (rows=8116 width=1119) + Filter Operator [FIL_213] (rows=8116 width=1119) predicate:(d_month_seq BETWEEN 1197 AND 1208 and d_week_seq is not null) TableScan [TS_15] (rows=73049 width=1119) default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_month_seq","d_week_seq"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] + SHUFFLE [RS_232] PartitionCols:_col0 - Group By Operator [GBY_225] (rows=316797606 width=88) + Group By Operator [GBY_231] (rows=316797606 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)"],keys:KEY._col0, KEY._col1 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_40] @@ -162,81 +162,81 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col2)","sum(_col3)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 Select Operator [SEL_37] (rows=633595212 width=88) Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_177] (rows=633595212 width=88) - Conds:RS_224._col0=RS_185._col0(Inner),Output:["_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_183] (rows=633595212 width=88) + Conds:RS_230._col0=RS_191._col0(Inner),Output:["_col1","_col2","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_185] + PARTITION_ONLY_SHUFFLE [RS_191] PartitionCols:_col0 - Select Operator [SEL_182] (rows=73049 width=1119) + Select Operator [SEL_188] (rows=73049 width=1119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_181] (rows=73049 width=1119) + Filter Operator [FIL_187] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_week_seq is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq","d_day_name"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] + SHUFFLE [RS_230] PartitionCols:_col0 - Select Operator [SEL_223] (rows=575995635 width=88) + Select Operator [SEL_229] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_222] (rows=575995635 width=88) + Filter Operator [FIL_228] (rows=575995635 width=88) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_35_date_dim_d_date_sk_min) AND DynamicValue(RS_35_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_35_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_53_store_s_store_sk_min) AND DynamicValue(RS_53_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_53_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_28] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_213] - Group By Operator [GBY_212] (rows=1 width=12) + BROADCAST [RS_219] + Group By Operator [GBY_218] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_190] - Group By Operator [GBY_188] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_196] + Group By Operator [GBY_194] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_186] (rows=73049 width=1119) + Select Operator [SEL_192] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_182] + Please refer to the previous Select Operator [SEL_188] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_221] - Group By Operator [GBY_220] (rows=1 width=12) + BROADCAST [RS_227] + Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_219] - Group By Operator [GBY_218] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_225] + Group By Operator [GBY_224] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_217] (rows=1704 width=1910) + Select Operator [SEL_223] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_215] + Please refer to the previous Select Operator [SEL_221] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_56] PartitionCols:_col2, _col1 Select Operator [SEL_27] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Merge Join Operator [MERGEJOIN_176] (rows=383325119 width=88) - Conds:RS_24._col1=RS_195._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col12","_col13"] + Merge Join Operator [MERGEJOIN_182] (rows=383325119 width=88) + Conds:RS_24._col1=RS_201._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col12","_col13"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_194] (rows=1704 width=1910) + Select Operator [SEL_200] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_193] (rows=1704 width=1910) + Filter Operator [FIL_199] (rows=1704 width=1910) predicate:(s_store_id is not null and s_store_sk is not null) TableScan [TS_18] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_175] (rows=348477374 width=88) - Conds:RS_205._col0=RS_210._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_181] (rows=348477374 width=88) + Conds:RS_211._col0=RS_216._col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_210] + SHUFFLE [RS_216] PartitionCols:_col1 - Select Operator [SEL_208] (rows=8116 width=1119) + Select Operator [SEL_214] (rows=8116 width=1119) Output:["_col1"] - Filter Operator [FIL_206] (rows=8116 width=1119) + Filter Operator [FIL_212] (rows=8116 width=1119) predicate:(d_month_seq BETWEEN 1185 AND 1196 and d_week_seq is not null) Please refer to the previous TableScan [TS_15] <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_205] + SHUFFLE [RS_211] PartitionCols:_col0 - Group By Operator [GBY_204] (rows=316797606 width=88) + Group By Operator [GBY_210] (rows=316797606 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] @@ -245,41 +245,41 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 Select Operator [SEL_9] (rows=633595212 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_174] (rows=633595212 width=88) - Conds:RS_203._col0=RS_183._col0(Inner),Output:["_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_180] (rows=633595212 width=88) + Conds:RS_209._col0=RS_189._col0(Inner),Output:["_col1","_col2","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_183] + PARTITION_ONLY_SHUFFLE [RS_189] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_182] + Please refer to the previous Select Operator [SEL_188] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] + SHUFFLE [RS_209] PartitionCols:_col0 - Select Operator [SEL_202] (rows=575995635 width=88) + Select Operator [SEL_208] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_201] (rows=575995635 width=88) + Filter Operator [FIL_207] (rows=575995635 width=88) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_25_store_s_store_sk_min) AND DynamicValue(RS_25_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_25_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_200] - Group By Operator [GBY_199] (rows=1 width=12) + BROADCAST [RS_206] + Group By Operator [GBY_205] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] - Group By Operator [GBY_197] (rows=1 width=12) + SHUFFLE [RS_204] + Group By Operator [GBY_203] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_196] (rows=1704 width=1910) + Select Operator [SEL_202] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_194] + Please refer to the previous Select Operator [SEL_200] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_192] - Group By Operator [GBY_191] (rows=1 width=12) + BROADCAST [RS_198] + Group By Operator [GBY_197] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_189] - Group By Operator [GBY_187] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_195] + Group By Operator [GBY_193] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_184] (rows=73049 width=1119) + Select Operator [SEL_190] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_182] + Please refer to the previous Select Operator [SEL_188] diff --git a/ql/src/test/results/clientpositive/perf/tez/query64.q.out b/ql/src/test/results/clientpositive/perf/tez/query64.q.out index a673b9f753..6d3edd3173 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query64.q.out @@ -237,31 +237,32 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 40 <- Reducer 23 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 58 (BROADCAST_EDGE), Reducer 61 (BROADCAST_EDGE), Reducer 62 (BROADCAST_EDGE), Reducer 66 (BROADCAST_EDGE) -Map 49 <- Reducer 55 (BROADCAST_EDGE), Reducer 56 (BROADCAST_EDGE), Reducer 61 (BROADCAST_EDGE) -Map 68 <- Reducer 31 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE), Reducer 39 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 48 (BROADCAST_EDGE), Reducer 53 (BROADCAST_EDGE), Reducer 59 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE), Reducer 64 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE) +Map 40 <- Reducer 23 (BROADCAST_EDGE), Reducer 34 (BROADCAST_EDGE), Reducer 38 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 47 (BROADCAST_EDGE), Reducer 52 (BROADCAST_EDGE), Reducer 62 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE), Reducer 66 (BROADCAST_EDGE), Reducer 70 (BROADCAST_EDGE) +Map 49 <- Reducer 43 (BROADCAST_EDGE), Reducer 54 (BROADCAST_EDGE), Reducer 55 (BROADCAST_EDGE), Reducer 65 (BROADCAST_EDGE) +Map 72 <- Reducer 31 (BROADCAST_EDGE), Reducer 35 (BROADCAST_EDGE), Reducer 39 (BROADCAST_EDGE), Reducer 43 (BROADCAST_EDGE), Reducer 45 (BROADCAST_EDGE), Reducer 48 (BROADCAST_EDGE), Reducer 58 (BROADCAST_EDGE), Reducer 63 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE), Reducer 68 (BROADCAST_EDGE), Reducer 71 (BROADCAST_EDGE) +Map 73 <- Reducer 45 (BROADCAST_EDGE), Reducer 59 (BROADCAST_EDGE), Reducer 60 (BROADCAST_EDGE), Reducer 67 (BROADCAST_EDGE) Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) Reducer 11 <- Reducer 10 (SIMPLE_EDGE) Reducer 12 <- Reducer 30 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 13 <- Map 65 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 13 <- Map 69 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (SIMPLE_EDGE) Reducer 16 <- Map 15 (SIMPLE_EDGE), Reducer 41 (SIMPLE_EDGE) Reducer 17 <- Map 46 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Reducer 51 (ONE_TO_ONE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 20 <- Map 57 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 20 <- Map 61 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) Reducer 21 <- Map 37 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) -Reducer 22 <- Map 60 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) +Reducer 22 <- Map 64 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE) Reducer 23 <- Map 15 (CUSTOM_SIMPLE_EDGE) Reducer 24 <- Map 15 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) Reducer 25 <- Map 46 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) -Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 51 (ONE_TO_ONE_EDGE) -Reducer 28 <- Map 57 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 57 (ONE_TO_ONE_EDGE) +Reducer 28 <- Map 61 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) Reducer 29 <- Map 37 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 30 <- Map 60 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) +Reducer 30 <- Map 64 (SIMPLE_EDGE), Reducer 29 (SIMPLE_EDGE) Reducer 31 <- Map 15 (CUSTOM_SIMPLE_EDGE) Reducer 33 <- Map 32 (SIMPLE_EDGE), Map 36 (SIMPLE_EDGE) Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) @@ -271,28 +272,32 @@ Reducer 39 <- Map 37 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 33 (SIMPLE_EDGE) Reducer 41 <- Map 40 (SIMPLE_EDGE), Map 42 (SIMPLE_EDGE) Reducer 43 <- Map 42 (CUSTOM_SIMPLE_EDGE) -Reducer 44 <- Map 42 (SIMPLE_EDGE), Map 68 (SIMPLE_EDGE) +Reducer 44 <- Map 42 (SIMPLE_EDGE), Map 72 (SIMPLE_EDGE) Reducer 45 <- Map 42 (CUSTOM_SIMPLE_EDGE) Reducer 47 <- Map 46 (CUSTOM_SIMPLE_EDGE) Reducer 48 <- Map 46 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Map 37 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 50 <- Map 49 (SIMPLE_EDGE), Map 54 (SIMPLE_EDGE) +Reducer 50 <- Map 49 (SIMPLE_EDGE), Map 53 (SIMPLE_EDGE) Reducer 51 <- Reducer 50 (SIMPLE_EDGE) Reducer 52 <- Reducer 51 (CUSTOM_SIMPLE_EDGE) -Reducer 53 <- Reducer 51 (CUSTOM_SIMPLE_EDGE) -Reducer 55 <- Map 54 (CUSTOM_SIMPLE_EDGE) -Reducer 56 <- Map 54 (CUSTOM_SIMPLE_EDGE) -Reducer 58 <- Map 57 (CUSTOM_SIMPLE_EDGE) -Reducer 59 <- Map 57 (CUSTOM_SIMPLE_EDGE) -Reducer 6 <- Map 65 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) -Reducer 61 <- Map 60 (CUSTOM_SIMPLE_EDGE) -Reducer 62 <- Map 60 (CUSTOM_SIMPLE_EDGE) -Reducer 63 <- Map 60 (CUSTOM_SIMPLE_EDGE) -Reducer 64 <- Map 60 (CUSTOM_SIMPLE_EDGE) -Reducer 66 <- Map 65 (CUSTOM_SIMPLE_EDGE) -Reducer 67 <- Map 65 (CUSTOM_SIMPLE_EDGE) +Reducer 54 <- Map 53 (CUSTOM_SIMPLE_EDGE) +Reducer 55 <- Map 53 (CUSTOM_SIMPLE_EDGE) +Reducer 56 <- Map 53 (SIMPLE_EDGE), Map 73 (SIMPLE_EDGE) +Reducer 57 <- Reducer 56 (SIMPLE_EDGE) +Reducer 58 <- Reducer 57 (CUSTOM_SIMPLE_EDGE) +Reducer 59 <- Map 53 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 69 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 60 <- Map 53 (CUSTOM_SIMPLE_EDGE) +Reducer 62 <- Map 61 (CUSTOM_SIMPLE_EDGE) +Reducer 63 <- Map 61 (CUSTOM_SIMPLE_EDGE) +Reducer 65 <- Map 64 (CUSTOM_SIMPLE_EDGE) +Reducer 66 <- Map 64 (CUSTOM_SIMPLE_EDGE) +Reducer 67 <- Map 64 (CUSTOM_SIMPLE_EDGE) +Reducer 68 <- Map 64 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Reducer 22 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Map 65 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 70 <- Map 69 (CUSTOM_SIMPLE_EDGE) +Reducer 71 <- Map 69 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Map 69 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 9 <- Reducer 8 (SIMPLE_EDGE) Stage-0 @@ -300,8 +305,8 @@ Stage-0 limit:-1 Stage-1 Reducer 11 vectorized - File Output Operator [FS_1230] - Select Operator [SEL_1229] (rows=273897192 width=88) + File Output Operator [FS_1283] + Select Operator [SEL_1282] (rows=273897192 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_259] @@ -309,14 +314,14 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"] Filter Operator [FIL_257] (rows=273897192 width=88) predicate:(_col19 <= _col12) - Merge Join Operator [MERGEJOIN_1055] (rows=821691577 width=88) - Conds:RS_1202._col2, _col1, _col3=RS_1228._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] + Merge Join Operator [MERGEJOIN_1087] (rows=821691577 width=88) + Conds:RS_1239._col2, _col1, _col3=RS_1281._col1, _col0, _col2(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col19","_col20","_col21","_col22"] <-Reducer 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1228] + SHUFFLE [RS_1281] PartitionCols:_col1, _col0, _col2 - Select Operator [SEL_1227] (rows=746992327 width=88) + Select Operator [SEL_1280] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_1226] (rows=746992327 width=88) + Group By Operator [GBY_1279] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_251] @@ -327,102 +332,102 @@ Stage-0 Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] Filter Operator [FIL_248] (rows=1493984654 width=88) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_1054] (rows=1493984654 width=88) - Conds:RS_245._col37=RS_1097._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1097] + Merge Join Operator [MERGEJOIN_1086] (rows=1493984654 width=88) + Conds:RS_245._col37=RS_1129._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1129] PartitionCols:_col0 - Select Operator [SEL_1093] (rows=1861800 width=385) + Select Operator [SEL_1125] (rows=1861800 width=385) Output:["_col0","_col1"] - Filter Operator [FIL_1092] (rows=1861800 width=385) + Filter Operator [FIL_1124] (rows=1861800 width=385) predicate:cd_demo_sk is not null TableScan [TS_97] (rows=1861800 width=385) default@customer_demographics,cd1,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_245] PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_1053] (rows=1358167838 width=88) + Merge Join Operator [MERGEJOIN_1085] (rows=1358167838 width=88) Conds:RS_242._col0=RS_243._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_242] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1024] (rows=128840811 width=860) - Conds:RS_112._col1=RS_1096._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1096] + Merge Join Operator [MERGEJOIN_1056] (rows=128840811 width=860) + Conds:RS_112._col1=RS_1128._col0(Inner),Output:["_col0","_col7","_col9","_col14","_col15","_col16","_col17","_col19"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1128] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1093] + Please refer to the previous Select Operator [SEL_1125] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_112] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1023] (rows=117128008 width=860) - Conds:RS_109._col3=RS_1083._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] + Merge Join Operator [MERGEJOIN_1055] (rows=117128008 width=860) + Conds:RS_109._col3=RS_1115._col0(Inner),Output:["_col0","_col1","_col7","_col9","_col14","_col15","_col16","_col17"] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1083] + SHUFFLE [RS_1115] PartitionCols:_col0 - Select Operator [SEL_1082] (rows=40000000 width=1014) + Select Operator [SEL_1114] (rows=40000000 width=1014) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1081] (rows=40000000 width=1014) + Filter Operator [FIL_1113] (rows=40000000 width=1014) predicate:ca_address_sk is not null TableScan [TS_19] (rows=40000000 width=1014) default@customer_address,ad2,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_street_number","ca_street_name","ca_city","ca_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_1022] (rows=106480005 width=860) + Merge Join Operator [MERGEJOIN_1054] (rows=106480005 width=860) Conds:RS_106._col2=RS_107._col0(Inner),Output:["_col0","_col1","_col3","_col7","_col9"] <-Reducer 33 [SIMPLE_EDGE] SHUFFLE [RS_107] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1021] (rows=7920 width=107) - Conds:RS_1077._col1=RS_1080._col0(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_1053] (rows=7920 width=107) + Conds:RS_1109._col1=RS_1112._col0(Inner),Output:["_col0"] <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1077] + SHUFFLE [RS_1109] PartitionCols:_col1 - Select Operator [SEL_1076] (rows=7200 width=107) + Select Operator [SEL_1108] (rows=7200 width=107) Output:["_col0","_col1"] - Filter Operator [FIL_1075] (rows=7200 width=107) + Filter Operator [FIL_1107] (rows=7200 width=107) predicate:(hd_demo_sk is not null and hd_income_band_sk is not null) TableScan [TS_9] (rows=7200 width=107) default@household_demographics,hd2,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_income_band_sk"] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1080] + SHUFFLE [RS_1112] PartitionCols:_col0 - Select Operator [SEL_1079] (rows=20 width=12) + Select Operator [SEL_1111] (rows=20 width=12) Output:["_col0"] - Filter Operator [FIL_1078] (rows=20 width=12) + Filter Operator [FIL_1110] (rows=20 width=12) predicate:ib_income_band_sk is not null TableScan [TS_12] (rows=20 width=12) default@income_band,ib2,Tbl:COMPLETE,Col:NONE,Output:["ib_income_band_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_1020] (rows=96800003 width=860) - Conds:RS_103._col4=RS_1066._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] + Merge Join Operator [MERGEJOIN_1052] (rows=96800003 width=860) + Conds:RS_103._col4=RS_1098._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col7","_col9"] <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1066] + PARTITION_ONLY_SHUFFLE [RS_1098] PartitionCols:_col0 - Select Operator [SEL_1062] (rows=73049 width=1119) + Select Operator [SEL_1094] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_1059] (rows=73049 width=1119) + Filter Operator [FIL_1091] (rows=73049 width=1119) predicate:d_date_sk is not null TableScan [TS_3] (rows=73049 width=1119) default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_1019] (rows=88000001 width=860) - Conds:RS_1058._col5=RS_1065._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] + Merge Join Operator [MERGEJOIN_1051] (rows=88000001 width=860) + Conds:RS_1090._col5=RS_1097._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col7"] <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1065] + PARTITION_ONLY_SHUFFLE [RS_1097] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1062] + Please refer to the previous Select Operator [SEL_1094] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1058] + SHUFFLE [RS_1090] PartitionCols:_col5 - Select Operator [SEL_1057] (rows=80000000 width=860) + Select Operator [SEL_1089] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1056] (rows=80000000 width=860) + Filter Operator [FIL_1088] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null and c_first_sales_date_sk is not null and c_first_shipto_date_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk","c_first_shipto_date_sk","c_first_sales_date_sk"] @@ -431,291 +436,297 @@ Stage-0 PartitionCols:_col16 Select Operator [SEL_223] (rows=1234698008 width=88) Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] - Merge Join Operator [MERGEJOIN_1052] (rows=1234698008 width=88) - Conds:RS_220._col5, _col12=RS_1149._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 60 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1149] + Merge Join Operator [MERGEJOIN_1084] (rows=1234698008 width=88) + Conds:RS_220._col5, _col12=RS_1190._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 64 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1190] PartitionCols:_col0, _col1 - Select Operator [SEL_1145] (rows=57591150 width=77) + Select Operator [SEL_1186] (rows=57591150 width=77) Output:["_col0","_col1"] - Filter Operator [FIL_1144] (rows=57591150 width=77) + Filter Operator [FIL_1185] (rows=57591150 width=77) predicate:(sr_item_sk is not null and sr_ticket_number is not null) TableScan [TS_75] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 29 [SIMPLE_EDGE] SHUFFLE [RS_220] PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_1051] (rows=1122452711 width=88) - Conds:RS_217._col9=RS_1086._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + Merge Join Operator [MERGEJOIN_1083] (rows=1122452711 width=88) + Conds:RS_217._col9=RS_1118._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1086] + SHUFFLE [RS_1118] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1082] + Please refer to the previous Select Operator [SEL_1114] <-Reducer 28 [SIMPLE_EDGE] SHUFFLE [RS_217] PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1050] (rows=1020411534 width=88) - Conds:RS_214._col10=RS_1183._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] - <-Map 57 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1183] + Merge Join Operator [MERGEJOIN_1082] (rows=1020411534 width=88) + Conds:RS_214._col10=RS_1220._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] + <-Map 61 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1220] PartitionCols:_col0 - Select Operator [SEL_1180] (rows=1704 width=1910) + Select Operator [SEL_1217] (rows=1704 width=1910) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1179] (rows=1704 width=1910) + Filter Operator [FIL_1216] (rows=1704 width=1910) predicate:(s_store_name is not null and s_store_sk is not null and s_zip is not null) TableScan [TS_69] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_zip"] <-Reducer 27 [SIMPLE_EDGE] SHUFFLE [RS_214] PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_1049] (rows=927646829 width=88) - Conds:RS_211._col5=RS_1171._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] - <-Reducer 51 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1171] + Merge Join Operator [MERGEJOIN_1081] (rows=927646829 width=88) + Conds:RS_211._col5=RS_1262._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + <-Reducer 57 [ONE_TO_ONE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1262] PartitionCols:_col0 - Select Operator [SEL_1168] (rows=52798137 width=135) + Select Operator [SEL_1261] (rows=52798137 width=135) Output:["_col0"] - Filter Operator [FIL_1167] (rows=52798137 width=135) + Filter Operator [FIL_1260] (rows=52798137 width=135) predicate:(_col1 > (2 * _col2)) - Group By Operator [GBY_1166] (rows=158394413 width=135) + Group By Operator [GBY_1259] (rows=158394413 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 - <-Reducer 50 [SIMPLE_EDGE] - SHUFFLE [RS_65] + <-Reducer 56 [SIMPLE_EDGE] + SHUFFLE [RS_192] PartitionCols:_col0 - Group By Operator [GBY_64] (rows=316788826 width=135) + Group By Operator [GBY_191] (rows=316788826 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 - Select Operator [SEL_62] (rows=316788826 width=135) + Select Operator [SEL_189] (rows=316788826 width=135) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_1029] (rows=316788826 width=135) - Conds:RS_1165._col0, _col1=RS_1133._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] - <-Map 54 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1133] + Merge Join Operator [MERGEJOIN_1079] (rows=316788826 width=135) + Conds:RS_1258._col0, _col1=RS_1170._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] + <-Map 53 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1170] PartitionCols:_col0, _col1 - Select Operator [SEL_1132] (rows=28798881 width=106) + Select Operator [SEL_1166] (rows=28798881 width=106) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_1131] (rows=28798881 width=106) + Filter Operator [FIL_1165] (rows=28798881 width=106) predicate:(cr_item_sk is not null and cr_order_number is not null) TableScan [TS_56] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash","cr_reversed_charge","cr_store_credit"] - <-Map 49 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1165] + <-Map 73 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1258] PartitionCols:_col0, _col1 - Select Operator [SEL_1164] (rows=287989836 width=135) + Select Operator [SEL_1257] (rows=287989836 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1163] (rows=287989836 width=135) - predicate:((cs_item_sk BETWEEN DynamicValue(RS_60_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_60_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_60_catalog_returns_cr_order_number_min) AND DynamicValue(RS_60_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_60_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) - TableScan [TS_53] (rows=287989836 width=135) + Filter Operator [FIL_1256] (rows=287989836 width=135) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_187_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_187_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_187_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_187_catalog_returns_cr_order_number_min) AND DynamicValue(RS_187_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_187_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_180] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] - <-Reducer 61 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1162] - Group By Operator [GBY_1160] (rows=1 width=12) + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1242] + Group By Operator [GBY_1240] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1144] + Group By Operator [GBY_1142] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1140] (rows=2851 width=1436) + Output:["_col0"] + Select Operator [SEL_1136] (rows=2851 width=1436) + Output:["_col0","_col3"] + Filter Operator [FIL_1135] (rows=2851 width=1436) + predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) + TableScan [TS_34] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] + <-Reducer 67 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1255] + Group By Operator [GBY_1253] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 60 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1156] - Group By Operator [GBY_1152] (rows=1 width=12) + <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1199] + Group By Operator [GBY_1195] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1147] (rows=57591150 width=77) + Select Operator [SEL_1191] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1145] - <-Reducer 55 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1141] - Group By Operator [GBY_1140] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 59 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1250] + Group By Operator [GBY_1249] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 54 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1138] - Group By Operator [GBY_1136] (rows=1 width=12) + <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1179] + Group By Operator [GBY_1175] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1134] (rows=28798881 width=106) + Select Operator [SEL_1171] (rows=28798881 width=106) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1132] - <-Reducer 56 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1143] - Group By Operator [GBY_1142] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1166] + <-Reducer 60 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1252] + Group By Operator [GBY_1251] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] - <-Map 54 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1139] - Group By Operator [GBY_1137] (rows=1 width=12) + <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1180] + Group By Operator [GBY_1176] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] - Select Operator [SEL_1135] (rows=28798881 width=106) + Select Operator [SEL_1172] (rows=28798881 width=106) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1132] + Please refer to the previous Select Operator [SEL_1166] <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_211] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_1048] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_1080] (rows=843315281 width=88) Conds:RS_208._col0=RS_209._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] <-Reducer 33 [SIMPLE_EDGE] SHUFFLE [RS_208] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1021] + Please refer to the previous Merge Join Operator [MERGEJOIN_1053] <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_209] PartitionCols:_col5 Select Operator [SEL_179] (rows=766650239 width=88) Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_1046] (rows=766650239 width=88) - Conds:RS_176._col7=RS_1121._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] + Merge Join Operator [MERGEJOIN_1078] (rows=766650239 width=88) + Conds:RS_176._col7=RS_1155._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 46 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1121] + PARTITION_ONLY_SHUFFLE [RS_1155] PartitionCols:_col0 - Select Operator [SEL_1118] (rows=2300 width=1179) + Select Operator [SEL_1152] (rows=2300 width=1179) Output:["_col0"] - Filter Operator [FIL_1117] (rows=2300 width=1179) + Filter Operator [FIL_1151] (rows=2300 width=1179) predicate:p_promo_sk is not null TableScan [TS_40] (rows=2300 width=1179) default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] <-Reducer 24 [SIMPLE_EDGE] SHUFFLE [RS_176] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1045] (rows=696954748 width=88) - Conds:RS_173._col0=RS_1069._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + Merge Join Operator [MERGEJOIN_1077] (rows=696954748 width=88) + Conds:RS_173._col0=RS_1101._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1069] + PARTITION_ONLY_SHUFFLE [RS_1101] PartitionCols:_col0 - Select Operator [SEL_1064] (rows=36524 width=1119) + Select Operator [SEL_1096] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_1061] (rows=36524 width=1119) + Filter Operator [FIL_1093] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Reducer 44 [SIMPLE_EDGE] SHUFFLE [RS_173] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1044] (rows=633595212 width=88) - Conds:RS_1225._col1=RS_1107._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + Merge Join Operator [MERGEJOIN_1076] (rows=633595212 width=88) + Conds:RS_1278._col1=RS_1139._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 42 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1107] + PARTITION_ONLY_SHUFFLE [RS_1139] PartitionCols:_col0 - Select Operator [SEL_1104] (rows=2851 width=1436) - Output:["_col0","_col3"] - Filter Operator [FIL_1103] (rows=2851 width=1436) - predicate:((i_color) IN ('maroon', 'burnished', 'dim', 'steel', 'navajo', 'chocolate') and i_current_price BETWEEN 35 AND 45 and i_current_price BETWEEN 36 AND 50 and i_item_sk is not null) - TableScan [TS_34] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_color","i_product_name"] - <-Map 68 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1225] + Please refer to the previous Select Operator [SEL_1136] + <-Map 72 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1278] PartitionCols:_col1 - Select Operator [SEL_1224] (rows=575995635 width=88) + Select Operator [SEL_1277] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1223] (rows=575995635 width=88) - predicate:((ss_addr_sk BETWEEN DynamicValue(RS_218_ad1_ca_address_sk_min) AND DynamicValue(RS_218_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_218_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_246_cd1_cd_demo_sk_min) AND DynamicValue(RS_246_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_246_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_208_hd1_hd_demo_sk_min) AND DynamicValue(RS_208_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_208_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_212_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_212_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_212_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_177_promotion_p_promo_sk_min) AND DynamicValue(RS_177_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_177_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_174_d1_d_date_sk_min) AND DynamicValue(RS_174_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_174_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_215_store_s_store_sk_min) AND DynamicValue(RS_215_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_215_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_221_store_returns_sr_ticket_number_min) AND DynamicValue(RS_221_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_221_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + Filter Operator [FIL_1276] (rows=575995635 width=88) + predicate:((ss_addr_sk BETWEEN DynamicValue(RS_218_ad1_ca_address_sk_min) AND DynamicValue(RS_218_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_218_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_246_cd1_cd_demo_sk_min) AND DynamicValue(RS_246_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_246_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_208_hd1_hd_demo_sk_min) AND DynamicValue(RS_208_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_208_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_171_item_i_item_sk_min) AND DynamicValue(RS_171_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_171_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_212_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_212_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_212_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_221_store_returns_sr_item_sk_min) AND DynamicValue(RS_221_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_221_store_returns_sr_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_177_promotion_p_promo_sk_min) AND DynamicValue(RS_177_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_177_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_174_d1_d_date_sk_min) AND DynamicValue(RS_174_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_174_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_215_store_s_store_sk_min) AND DynamicValue(RS_215_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_215_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_221_store_returns_sr_ticket_number_min) AND DynamicValue(RS_221_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_221_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_158] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1148] + Group By Operator [GBY_1145] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1143] + Group By Operator [GBY_1141] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_1138] (rows=2851 width=1436) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1136] + <-Reducer 45 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1241] + Please refer to the previous Group By Operator [GBY_1240] + <-Reducer 67 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1254] + Please refer to the previous Group By Operator [GBY_1253] <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1206] - Group By Operator [GBY_1205] (rows=1 width=12) + BROADCAST [RS_1244] + Group By Operator [GBY_1243] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1074] - Group By Operator [GBY_1072] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1106] + Group By Operator [GBY_1104] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1070] (rows=36524 width=1119) + Select Operator [SEL_1102] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1064] + Please refer to the previous Select Operator [SEL_1096] <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1210] - Group By Operator [GBY_1209] (rows=1 width=12) + BROADCAST [RS_1248] + Group By Operator [GBY_1247] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 33 [CUSTOM_SIMPLE_EDGE] - SHUFFLE [RS_895] - Group By Operator [GBY_894] (rows=1 width=12) + SHUFFLE [RS_909] + Group By Operator [GBY_908] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_893] (rows=7920 width=107) + Select Operator [SEL_907] (rows=7920 width=107) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_1021] + Please refer to the previous Merge Join Operator [MERGEJOIN_1053] <-Reducer 39 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1218] - Group By Operator [GBY_1217] (rows=1 width=12) + BROADCAST [RS_1271] + Group By Operator [GBY_1270] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1091] - Group By Operator [GBY_1089] (rows=1 width=12) + SHUFFLE [RS_1123] + Group By Operator [GBY_1121] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_1087] (rows=40000000 width=1014) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1082] - <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1204] - Group By Operator [GBY_1203] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1112] - Group By Operator [GBY_1110] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1108] (rows=2851 width=1436) + Select Operator [SEL_1119] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1104] + Please refer to the previous Select Operator [SEL_1114] <-Reducer 48 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1208] - Group By Operator [GBY_1207] (rows=1 width=12) + BROADCAST [RS_1246] + Group By Operator [GBY_1245] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1126] - Group By Operator [GBY_1124] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1160] + Group By Operator [GBY_1158] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1122] (rows=2300 width=1179) + Select Operator [SEL_1156] (rows=2300 width=1179) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1118] - <-Reducer 53 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1212] - Group By Operator [GBY_1211] (rows=1 width=228) + Please refer to the previous Select Operator [SEL_1152] + <-Reducer 58 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1267] + Group By Operator [GBY_1266] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=52798136)"] - <-Reducer 51 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1176] - Group By Operator [GBY_1174] (rows=1 width=228) + <-Reducer 57 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1265] + Group By Operator [GBY_1264] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=52798136)"] - Select Operator [SEL_1172] (rows=52798137 width=135) + Select Operator [SEL_1263] (rows=52798137 width=135) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1168] - <-Reducer 59 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1216] - Group By Operator [GBY_1215] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1261] + <-Reducer 63 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1269] + Group By Operator [GBY_1268] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 57 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1188] - Group By Operator [GBY_1186] (rows=1 width=12) + <-Map 61 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1225] + Group By Operator [GBY_1223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1184] (rows=1704 width=1910) + Select Operator [SEL_1221] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1180] - <-Reducer 63 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1214] - Group By Operator [GBY_1213] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1217] + <-Reducer 68 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1273] + Group By Operator [GBY_1272] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 60 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1158] - Group By Operator [GBY_1154] (rows=1 width=12) + <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1200] + Group By Operator [GBY_1196] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1150] (rows=57591150 width=77) + Select Operator [SEL_1192] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1145] - <-Reducer 64 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1220] - Group By Operator [GBY_1219] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 60 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1159] - Group By Operator [GBY_1155] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1151] (rows=57591150 width=77) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1145] - <-Reducer 67 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1222] - Group By Operator [GBY_1221] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 71 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1275] + Group By Operator [GBY_1274] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] - <-Map 65 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1102] - Group By Operator [GBY_1100] (rows=1 width=12) + <-Map 69 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1134] + Group By Operator [GBY_1132] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_1098] (rows=1861800 width=385) + Select Operator [SEL_1130] (rows=1861800 width=385) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1093] + Please refer to the previous Select Operator [SEL_1125] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1202] + SHUFFLE [RS_1239] PartitionCols:_col2, _col1, _col3 - Select Operator [SEL_1201] (rows=746992327 width=88) + Select Operator [SEL_1238] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] - Group By Operator [GBY_1200] (rows=746992327 width=88) + Group By Operator [GBY_1237] (rows=746992327 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9, KEY._col10, KEY._col11, KEY._col12, KEY._col13 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_124] @@ -726,127 +737,193 @@ Stage-0 Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49"] Filter Operator [FIL_121] (rows=1493984654 width=88) predicate:(_col56 <> _col19) - Merge Join Operator [MERGEJOIN_1036] (rows=1493984654 width=88) - Conds:RS_118._col37=RS_1094._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] - <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1094] + Merge Join Operator [MERGEJOIN_1068] (rows=1493984654 width=88) + Conds:RS_118._col37=RS_1126._col0(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col43","_col44","_col45","_col46","_col49","_col56"] + <-Map 69 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1126] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1093] + Please refer to the previous Select Operator [SEL_1125] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_118] PartitionCols:_col37 - Merge Join Operator [MERGEJOIN_1035] (rows=1358167838 width=88) + Merge Join Operator [MERGEJOIN_1067] (rows=1358167838 width=88) Conds:RS_115._col0=RS_116._col16(Inner),Output:["_col7","_col9","_col14","_col15","_col16","_col17","_col19","_col23","_col24","_col25","_col26","_col28","_col29","_col37","_col43","_col44","_col45","_col46","_col49"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_115] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1024] + Please refer to the previous Merge Join Operator [MERGEJOIN_1056] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_116] PartitionCols:_col16 Select Operator [SEL_96] (rows=1234698008 width=88) Output:["_col3","_col4","_col5","_col6","_col8","_col9","_col16","_col17","_col23","_col24","_col25","_col26","_col29"] - Merge Join Operator [MERGEJOIN_1034] (rows=1234698008 width=88) - Conds:RS_93._col5, _col12=RS_1146._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] - <-Map 60 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1146] + Merge Join Operator [MERGEJOIN_1066] (rows=1234698008 width=88) + Conds:RS_93._col5, _col12=RS_1187._col0, _col1(Inner),Output:["_col6","_col7","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + <-Map 64 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1187] PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_1145] + Please refer to the previous Select Operator [SEL_1186] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_93] PartitionCols:_col5, _col12 - Merge Join Operator [MERGEJOIN_1033] (rows=1122452711 width=88) - Conds:RS_90._col9=RS_1084._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] + Merge Join Operator [MERGEJOIN_1065] (rows=1122452711 width=88) + Conds:RS_90._col9=RS_1116._col0(Inner),Output:["_col5","_col6","_col7","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27","_col29","_col30","_col31","_col32"] <-Map 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1084] + SHUFFLE [RS_1116] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1082] + Please refer to the previous Select Operator [SEL_1114] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_90] PartitionCols:_col9 - Merge Join Operator [MERGEJOIN_1032] (rows=1020411534 width=88) - Conds:RS_87._col10=RS_1181._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] - <-Map 57 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1181] + Merge Join Operator [MERGEJOIN_1064] (rows=1020411534 width=88) + Conds:RS_87._col10=RS_1218._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col12","_col13","_col14","_col15","_col16","_col19","_col26","_col27"] + <-Map 61 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1218] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1180] + Please refer to the previous Select Operator [SEL_1217] <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_87] PartitionCols:_col10 - Merge Join Operator [MERGEJOIN_1031] (rows=927646829 width=88) - Conds:RS_84._col5=RS_1169._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] + Merge Join Operator [MERGEJOIN_1063] (rows=927646829 width=88) + Conds:RS_84._col5=RS_1210._col0(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] <-Reducer 51 [ONE_TO_ONE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1169] + PARTITION_ONLY_SHUFFLE [RS_1210] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1168] + Select Operator [SEL_1209] (rows=52798137 width=135) + Output:["_col0"] + Filter Operator [FIL_1208] (rows=52798137 width=135) + predicate:(_col1 > (2 * _col2)) + Group By Operator [GBY_1207] (rows=158394413 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 + <-Reducer 50 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Group By Operator [GBY_64] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 + Select Operator [SEL_62] (rows=316788826 width=135) + Output:["_col0","_col1","_col2"] + Merge Join Operator [MERGEJOIN_1061] (rows=316788826 width=135) + Conds:RS_1206._col0, _col1=RS_1167._col0, _col1(Inner),Output:["_col0","_col2","_col5","_col6","_col7"] + <-Map 53 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1167] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_1166] + <-Map 49 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_1206] + PartitionCols:_col0, _col1 + Select Operator [SEL_1205] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_1204] (rows=287989836 width=135) + predicate:((cs_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_60_catalog_returns_cr_item_sk_min) AND DynamicValue(RS_60_catalog_returns_cr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_catalog_returns_cr_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (cs_order_number BETWEEN DynamicValue(RS_60_catalog_returns_cr_order_number_min) AND DynamicValue(RS_60_catalog_returns_cr_order_number_max) and in_bloom_filter(cs_order_number, DynamicValue(RS_60_catalog_returns_cr_order_number_bloom_filter))) and cs_item_sk is not null and cs_order_number is not null) + TableScan [TS_53] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_item_sk","cs_order_number","cs_ext_list_price"] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1147] + Please refer to the previous Group By Operator [GBY_1145] + <-Reducer 65 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1203] + Group By Operator [GBY_1201] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] + <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1197] + Group By Operator [GBY_1193] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] + Select Operator [SEL_1188] (rows=57591150 width=77) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 54 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1182] + Group By Operator [GBY_1181] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1177] + Group By Operator [GBY_1173] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1168] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1166] + <-Reducer 55 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1184] + Group By Operator [GBY_1183] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=28798880)"] + <-Map 53 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1178] + Group By Operator [GBY_1174] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=28798880)"] + Select Operator [SEL_1169] (rows=28798881 width=106) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_1166] <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_84] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_1030] (rows=843315281 width=88) + Merge Join Operator [MERGEJOIN_1062] (rows=843315281 width=88) Conds:RS_81._col0=RS_82._col5(Inner),Output:["_col5","_col6","_col7","_col9","_col10","_col12","_col13","_col14","_col15","_col16","_col19"] <-Reducer 33 [SIMPLE_EDGE] SHUFFLE [RS_81] PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_1021] + Please refer to the previous Merge Join Operator [MERGEJOIN_1053] <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_82] PartitionCols:_col5 Select Operator [SEL_52] (rows=766650239 width=88) Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col13","_col16"] - Merge Join Operator [MERGEJOIN_1028] (rows=766650239 width=88) - Conds:RS_49._col7=RS_1119._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] + Merge Join Operator [MERGEJOIN_1060] (rows=766650239 width=88) + Conds:RS_49._col7=RS_1153._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 46 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1119] + PARTITION_ONLY_SHUFFLE [RS_1153] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1118] + Please refer to the previous Select Operator [SEL_1152] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_1027] (rows=696954748 width=88) - Conds:RS_46._col0=RS_1067._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + Merge Join Operator [MERGEJOIN_1059] (rows=696954748 width=88) + Conds:RS_46._col0=RS_1099._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 15 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1067] + PARTITION_ONLY_SHUFFLE [RS_1099] PartitionCols:_col0 - Select Operator [SEL_1063] (rows=36524 width=1119) + Select Operator [SEL_1095] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_1060] (rows=36524 width=1119) + Filter Operator [FIL_1092] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Reducer 41 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1026] (rows=633595212 width=88) - Conds:RS_1199._col1=RS_1105._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] + Merge Join Operator [MERGEJOIN_1058] (rows=633595212 width=88) + Conds:RS_1236._col1=RS_1137._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col15"] <-Map 42 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1105] + PARTITION_ONLY_SHUFFLE [RS_1137] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1104] + Please refer to the previous Select Operator [SEL_1136] <-Map 40 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1199] + SHUFFLE [RS_1236] PartitionCols:_col1 - Select Operator [SEL_1198] (rows=575995635 width=88) + Select Operator [SEL_1235] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_1197] (rows=575995635 width=88) + Filter Operator [FIL_1234] (rows=575995635 width=88) predicate:((ss_addr_sk BETWEEN DynamicValue(RS_91_ad1_ca_address_sk_min) AND DynamicValue(RS_91_ad1_ca_address_sk_max) and in_bloom_filter(ss_addr_sk, DynamicValue(RS_91_ad1_ca_address_sk_bloom_filter))) and (ss_cdemo_sk BETWEEN DynamicValue(RS_119_cd1_cd_demo_sk_min) AND DynamicValue(RS_119_cd1_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_119_cd1_cd_demo_sk_bloom_filter))) and (ss_hdemo_sk BETWEEN DynamicValue(RS_81_hd1_hd_demo_sk_min) AND DynamicValue(RS_81_hd1_hd_demo_sk_max) and in_bloom_filter(ss_hdemo_sk, DynamicValue(RS_81_hd1_hd_demo_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_44_item_i_item_sk_min) AND DynamicValue(RS_44_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_44_item_i_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_85_catalog_sales_cs_item_sk_min) AND DynamicValue(RS_85_catalog_sales_cs_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_85_catalog_sales_cs_item_sk_bloom_filter))) and (ss_item_sk BETWEEN DynamicValue(RS_94_store_returns_sr_item_sk_min) AND DynamicValue(RS_94_store_returns_sr_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_94_store_returns_sr_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_50_promotion_p_promo_sk_min) AND DynamicValue(RS_50_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_50_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_47_d1_d_date_sk_min) AND DynamicValue(RS_47_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_47_d1_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_88_store_s_store_sk_min) AND DynamicValue(RS_88_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_88_store_s_store_sk_bloom_filter))) and (ss_ticket_number BETWEEN DynamicValue(RS_94_store_returns_sr_ticket_number_min) AND DynamicValue(RS_94_store_returns_sr_ticket_number_max) and in_bloom_filter(ss_ticket_number, DynamicValue(RS_94_store_returns_sr_ticket_number_bloom_filter))) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_customer_sk is not null and ss_hdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) TableScan [TS_31] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 61 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1161] - Please refer to the previous Group By Operator [GBY_1160] + <-Reducer 43 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1146] + Please refer to the previous Group By Operator [GBY_1145] + <-Reducer 65 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1202] + Please refer to the previous Group By Operator [GBY_1201] <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1116] - Group By Operator [GBY_1115] (rows=1 width=12) + BROADCAST [RS_1150] + Group By Operator [GBY_1149] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1073] - Group By Operator [GBY_1071] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1105] + Group By Operator [GBY_1103] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1068] (rows=36524 width=1119) + Select Operator [SEL_1100] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1063] + Please refer to the previous Select Operator [SEL_1095] <-Reducer 34 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1130] - Group By Operator [GBY_1129] (rows=1 width=12) + BROADCAST [RS_1164] + Group By Operator [GBY_1163] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 33 [CUSTOM_SIMPLE_EDGE] SHUFFLE [RS_674] @@ -854,82 +931,71 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] Select Operator [SEL_672] (rows=7920 width=107) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_1021] + Please refer to the previous Merge Join Operator [MERGEJOIN_1053] <-Reducer 38 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1192] - Group By Operator [GBY_1191] (rows=1 width=12) + BROADCAST [RS_1229] + Group By Operator [GBY_1228] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=40000000)"] <-Map 37 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1090] - Group By Operator [GBY_1088] (rows=1 width=12) + SHUFFLE [RS_1122] + Group By Operator [GBY_1120] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=40000000)"] - Select Operator [SEL_1085] (rows=40000000 width=1014) + Select Operator [SEL_1117] (rows=40000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1082] - <-Reducer 43 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1114] - Group By Operator [GBY_1113] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 42 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1111] - Group By Operator [GBY_1109] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1106] (rows=2851 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_1104] + Please refer to the previous Select Operator [SEL_1114] <-Reducer 47 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1128] - Group By Operator [GBY_1127] (rows=1 width=12) + BROADCAST [RS_1162] + Group By Operator [GBY_1161] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 46 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1125] - Group By Operator [GBY_1123] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1159] + Group By Operator [GBY_1157] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1120] (rows=2300 width=1179) + Select Operator [SEL_1154] (rows=2300 width=1179) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1118] + Please refer to the previous Select Operator [SEL_1152] <-Reducer 52 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1178] - Group By Operator [GBY_1177] (rows=1 width=228) + BROADCAST [RS_1215] + Group By Operator [GBY_1214] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=52798136)"] <-Reducer 51 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1175] - Group By Operator [GBY_1173] (rows=1 width=228) + PARTITION_ONLY_SHUFFLE [RS_1213] + Group By Operator [GBY_1212] (rows=1 width=228) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=52798136)"] - Select Operator [SEL_1170] (rows=52798137 width=135) + Select Operator [SEL_1211] (rows=52798137 width=135) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1168] - <-Reducer 58 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1190] - Group By Operator [GBY_1189] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1209] + <-Reducer 62 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1227] + Group By Operator [GBY_1226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 57 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1187] - Group By Operator [GBY_1185] (rows=1 width=12) + <-Map 61 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1224] + Group By Operator [GBY_1222] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1182] (rows=1704 width=1910) + Select Operator [SEL_1219] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1180] - <-Reducer 62 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1194] - Group By Operator [GBY_1193] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1217] + <-Reducer 66 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1231] + Group By Operator [GBY_1230] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=57591152)"] - <-Map 60 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1157] - Group By Operator [GBY_1153] (rows=1 width=12) + <-Map 64 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_1198] + Group By Operator [GBY_1194] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=57591152)"] - Select Operator [SEL_1148] (rows=57591150 width=77) + Select Operator [SEL_1189] (rows=57591150 width=77) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1145] - <-Reducer 66 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1196] - Group By Operator [GBY_1195] (rows=1 width=12) + Please refer to the previous Select Operator [SEL_1186] + <-Reducer 70 [BROADCAST_EDGE] vectorized + BROADCAST [RS_1233] + Group By Operator [GBY_1232] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1861800)"] - <-Map 65 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1101] - Group By Operator [GBY_1099] (rows=1 width=12) + <-Map 69 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_1133] + Group By Operator [GBY_1131] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1861800)"] - Select Operator [SEL_1095] (rows=1861800 width=385) + Select Operator [SEL_1127] (rows=1861800 width=385) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1093] + Please refer to the previous Select Operator [SEL_1125] diff --git a/ql/src/test/results/clientpositive/perf/tez/query69.q.out b/ql/src/test/results/clientpositive/perf/tez/query69.q.out index a9c7ac3b21..aad5b8188a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query69.q.out @@ -117,16 +117,16 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_230] - Limit [LIM_229] (rows=100 width=88) + File Output Operator [FS_232] + Limit [LIM_231] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_228] (rows=191662559 width=88) + Select Operator [SEL_230] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_227] - Select Operator [SEL_226] (rows=191662559 width=88) + SHUFFLE [RS_229] + Select Operator [SEL_228] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Group By Operator [GBY_225] (rows=191662559 width=88) + Group By Operator [GBY_227] (rows=191662559 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_67] @@ -137,51 +137,51 @@ Stage-0 Output:["_col6","_col7","_col8","_col9","_col10"] Filter Operator [FIL_64] (rows=383325119 width=88) predicate:_col14 is null - Merge Join Operator [MERGEJOIN_178] (rows=766650239 width=88) - Conds:RS_61._col0=RS_224._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col14"] + Merge Join Operator [MERGEJOIN_180] (rows=766650239 width=88) + Conds:RS_61._col0=RS_226._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col14"] <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_224] + FORWARD [RS_226] PartitionCols:_col0 - Select Operator [SEL_223] (rows=158394413 width=135) + Select Operator [SEL_225] (rows=158394413 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_222] (rows=158394413 width=135) + Group By Operator [GBY_224] (rows=158394413 width=135) Output:["_col0"],keys:KEY._col0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col0 Group By Operator [GBY_57] (rows=316788826 width=135) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_176] (rows=316788826 width=135) - Conds:RS_221._col0=RS_194._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_178] (rows=316788826 width=135) + Conds:RS_223._col0=RS_196._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_194] + PARTITION_ONLY_SHUFFLE [RS_196] PartitionCols:_col0 - Select Operator [SEL_189] (rows=4058 width=1119) + Select Operator [SEL_191] (rows=4058 width=1119) Output:["_col0"] - Filter Operator [FIL_188] (rows=4058 width=1119) + Filter Operator [FIL_190] (rows=4058 width=1119) predicate:((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) TableScan [TS_12] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] + SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_220] (rows=287989836 width=135) + Select Operator [SEL_222] (rows=287989836 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_219] (rows=287989836 width=135) + Filter Operator [FIL_221] (rows=287989836 width=135) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_54_date_dim_d_date_sk_min) AND DynamicValue(RS_54_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_54_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null) TableScan [TS_47] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_218] - Group By Operator [GBY_217] (rows=1 width=12) + BROADCAST [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_201] - Group By Operator [GBY_198] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_203] + Group By Operator [GBY_200] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_195] (rows=4058 width=1119) + Select Operator [SEL_197] (rows=4058 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_189] + Please refer to the previous Select Operator [SEL_191] <-Reducer 4 [ONE_TO_ONE_EDGE] FORWARD [RS_61] PartitionCols:_col0 @@ -189,42 +189,42 @@ Stage-0 Output:["_col0","_col6","_col7","_col8","_col9","_col10"] Filter Operator [FIL_45] (rows=696954748 width=88) predicate:_col12 is null - Merge Join Operator [MERGEJOIN_177] (rows=1393909496 width=88) - Conds:RS_41._col0=RS_42._col0(Left Semi),RS_41._col0=RS_216._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"] + Merge Join Operator [MERGEJOIN_179] (rows=1393909496 width=88) + Conds:RS_41._col0=RS_42._col0(Left Semi),RS_41._col0=RS_218._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col12"] <-Reducer 3 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_41] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_173] (rows=96800003 width=860) - Conds:RS_36._col1=RS_187._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_175] (rows=96800003 width=860) + Conds:RS_36._col1=RS_189._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] + SHUFFLE [RS_189] PartitionCols:_col0 - Select Operator [SEL_186] (rows=1861800 width=385) + Select Operator [SEL_188] (rows=1861800 width=385) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_185] (rows=1861800 width=385) + Filter Operator [FIL_187] (rows=1861800 width=385) predicate:cd_demo_sk is not null TableScan [TS_6] (rows=1861800 width=385) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_172] (rows=88000001 width=860) - Conds:RS_181._col2=RS_184._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_174] (rows=88000001 width=860) + Conds:RS_183._col2=RS_186._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_181] + SHUFFLE [RS_183] PartitionCols:_col2 - Select Operator [SEL_180] (rows=80000000 width=860) + Select Operator [SEL_182] (rows=80000000 width=860) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_179] (rows=80000000 width=860) + Filter Operator [FIL_181] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=860) default@customer,c,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + SHUFFLE [RS_186] PartitionCols:_col0 - Select Operator [SEL_183] (rows=20000000 width=1014) + Select Operator [SEL_185] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_182] (rows=20000000 width=1014) + Filter Operator [FIL_184] (rows=20000000 width=1014) predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) TableScan [TS_3] (rows=40000000 width=1014) default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] @@ -235,35 +235,35 @@ Stage-0 Output:["_col0"],keys:_col0 Select Operator [SEL_18] (rows=633595212 width=88) Output:["_col0"] - Merge Join Operator [MERGEJOIN_174] (rows=633595212 width=88) - Conds:RS_208._col0=RS_190._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_176] (rows=633595212 width=88) + Conds:RS_210._col0=RS_192._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_190] + PARTITION_ONLY_SHUFFLE [RS_192] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_189] + Please refer to the previous Select Operator [SEL_191] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_208] + SHUFFLE [RS_210] PartitionCols:_col0 - Select Operator [SEL_207] (rows=575995635 width=88) + Select Operator [SEL_209] (rows=575995635 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_206] (rows=575995635 width=88) + Filter Operator [FIL_208] (rows=575995635 width=88) predicate:((ss_customer_sk BETWEEN DynamicValue(RS_41_c_c_customer_sk_min) AND DynamicValue(RS_41_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_41_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null) TableScan [TS_9] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_203] - Group By Operator [GBY_202] (rows=1 width=12) + BROADCAST [RS_205] + Group By Operator [GBY_204] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_199] - Group By Operator [GBY_196] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_201] + Group By Operator [GBY_198] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_191] (rows=4058 width=1119) + Select Operator [SEL_193] (rows=4058 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_189] + Please refer to the previous Select Operator [SEL_191] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_205] - Group By Operator [GBY_204] (rows=1 width=12) + BROADCAST [RS_207] + Group By Operator [GBY_206] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=96800000)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_137] @@ -271,43 +271,43 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=96800000)"] Select Operator [SEL_135] (rows=96800003 width=860) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_173] + Please refer to the previous Merge Join Operator [MERGEJOIN_175] <-Reducer 16 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_216] + FORWARD [RS_218] PartitionCols:_col0 - Select Operator [SEL_215] (rows=79201469 width=135) + Select Operator [SEL_217] (rows=79201469 width=135) Output:["_col0","_col1"] - Group By Operator [GBY_214] (rows=79201469 width=135) + Group By Operator [GBY_216] (rows=79201469 width=135) Output:["_col0"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0 Group By Operator [GBY_29] (rows=158402938 width=135) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_175] (rows=158402938 width=135) - Conds:RS_213._col0=RS_192._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_177] (rows=158402938 width=135) + Conds:RS_215._col0=RS_194._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_192] + PARTITION_ONLY_SHUFFLE [RS_194] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_189] + Please refer to the previous Select Operator [SEL_191] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_213] + SHUFFLE [RS_215] PartitionCols:_col0 - Select Operator [SEL_212] (rows=144002668 width=135) + Select Operator [SEL_214] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_211] (rows=144002668 width=135) + Filter Operator [FIL_213] (rows=144002668 width=135) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null) TableScan [TS_19] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_210] - Group By Operator [GBY_209] (rows=1 width=12) + BROADCAST [RS_212] + Group By Operator [GBY_211] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_200] - Group By Operator [GBY_197] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_202] + Group By Operator [GBY_199] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_193] (rows=4058 width=1119) + Select Operator [SEL_195] (rows=4058 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_189] + Please refer to the previous Select Operator [SEL_191] diff --git a/ql/src/test/results/clientpositive/perf/tez/query72.q.out b/ql/src/test/results/clientpositive/perf/tez/query72.q.out index 48682e340d..65a60ea30a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query72.q.out @@ -86,14 +86,14 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_311] - Limit [LIM_310] (rows=100 width=135) + File Output Operator [FS_315] + Limit [LIM_314] (rows=100 width=135) Number of rows:100 - Select Operator [SEL_309] (rows=37725837 width=135) + Select Operator [SEL_313] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_308] - Group By Operator [GBY_307] (rows=37725837 width=135) + SHUFFLE [RS_312] + Group By Operator [GBY_311] (rows=37725837 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_69] @@ -102,14 +102,14 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col3)","count(_col4)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_66] (rows=75451675 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_247] (rows=75451675 width=135) - Conds:RS_63._col4, _col6=RS_306._col0, _col1(Left Outer),Output:["_col13","_col15","_col22","_col28"] + Merge Join Operator [MERGEJOIN_251] (rows=75451675 width=135) + Conds:RS_63._col4, _col6=RS_310._col0, _col1(Left Outer),Output:["_col13","_col15","_col22","_col28"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_306] + SHUFFLE [RS_310] PartitionCols:_col0, _col1 - Select Operator [SEL_305] (rows=28798881 width=106) + Select Operator [SEL_309] (rows=28798881 width=106) Output:["_col0","_col1"] - Filter Operator [FIL_304] (rows=28798881 width=106) + Filter Operator [FIL_308] (rows=28798881 width=106) predicate:cr_item_sk is not null TableScan [TS_60] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] @@ -118,14 +118,14 @@ Stage-0 PartitionCols:_col4, _col6 Select Operator [SEL_59] (rows=68592431 width=135) Output:["_col4","_col6","_col13","_col15","_col22","_col28"] - Merge Join Operator [MERGEJOIN_246] (rows=68592431 width=135) - Conds:RS_56._col0, _col20=RS_303._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col20","_col26"] + Merge Join Operator [MERGEJOIN_250] (rows=68592431 width=135) + Conds:RS_56._col0, _col20=RS_307._col0, _col1(Inner),Output:["_col5","_col9","_col14","_col16","_col20","_col26"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_303] + SHUFFLE [RS_307] PartitionCols:_col0, _col1 - Select Operator [SEL_302] (rows=73049 width=1119) + Select Operator [SEL_306] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_301] (rows=73049 width=1119) + Filter Operator [FIL_305] (rows=73049 width=1119) predicate:(d_date_sk is not null and d_week_seq is not null) TableScan [TS_46] (rows=73049 width=1119) default@date_dim,d2,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_week_seq"] @@ -134,28 +134,28 @@ Stage-0 PartitionCols:_col0, _col20 Filter Operator [FIL_55] (rows=62356755 width=135) predicate:(_col3 < _col17) - Merge Join Operator [MERGEJOIN_245] (rows=187070265 width=135) + Merge Join Operator [MERGEJOIN_249] (rows=187070265 width=135) Conds:RS_52._col1=RS_53._col8(Inner),Output:["_col0","_col3","_col5","_col9","_col14","_col16","_col17","_col20","_col26"] <-Reducer 2 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_238] (rows=41342400 width=15) - Conds:RS_250._col2=RS_253._col0(Inner),Output:["_col0","_col1","_col3","_col5"] + Merge Join Operator [MERGEJOIN_242] (rows=41342400 width=15) + Conds:RS_254._col2=RS_257._col0(Inner),Output:["_col0","_col1","_col3","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] + SHUFFLE [RS_254] PartitionCols:_col2 - Select Operator [SEL_249] (rows=37584000 width=15) + Select Operator [SEL_253] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_248] (rows=37584000 width=15) + Filter Operator [FIL_252] (rows=37584000 width=15) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) TableScan [TS_0] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_253] + SHUFFLE [RS_257] PartitionCols:_col0 - Select Operator [SEL_252] (rows=27 width=1029) + Select Operator [SEL_256] (rows=27 width=1029) Output:["_col0","_col1"] - Filter Operator [FIL_251] (rows=27 width=1029) + Filter Operator [FIL_255] (rows=27 width=1029) predicate:w_warehouse_sk is not null TableScan [TS_3] (rows=27 width=1029) default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] @@ -166,152 +166,152 @@ Stage-0 Output:["_col3","_col8","_col10","_col11","_col14","_col20"] Filter Operator [FIL_44] (rows=170063874 width=135) predicate:(UDFToDouble(_col20) > (UDFToDouble(_col9) + 5.0D)) - Merge Join Operator [MERGEJOIN_244] (rows=510191624 width=135) - Conds:RS_41._col1=RS_290._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col16","_col18","_col20"] + Merge Join Operator [MERGEJOIN_248] (rows=510191624 width=135) + Conds:RS_41._col1=RS_294._col0(Inner),Output:["_col4","_col6","_col7","_col9","_col10","_col16","_col18","_col20"] <-Map 26 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_290] + PARTITION_ONLY_SHUFFLE [RS_294] PartitionCols:_col0 - Select Operator [SEL_289] (rows=73049 width=1119) + Select Operator [SEL_293] (rows=73049 width=1119) Output:["_col0","_col1"] - Filter Operator [FIL_288] (rows=73049 width=1119) + Filter Operator [FIL_292] (rows=73049 width=1119) predicate:d_date_sk is not null TableScan [TS_23] (rows=73049 width=1119) default@date_dim,d3,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_243] (rows=463810558 width=135) - Conds:RS_38._col4=RS_280._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16","_col18"] + Merge Join Operator [MERGEJOIN_247] (rows=463810558 width=135) + Conds:RS_38._col4=RS_284._col0(Inner),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16","_col18"] <-Map 24 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_280] + PARTITION_ONLY_SHUFFLE [RS_284] PartitionCols:_col0 - Select Operator [SEL_279] (rows=462000 width=1436) + Select Operator [SEL_283] (rows=462000 width=1436) Output:["_col0","_col1"] - Filter Operator [FIL_278] (rows=462000 width=1436) + Filter Operator [FIL_282] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_20] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_desc"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_242] (rows=421645953 width=135) - Conds:RS_35._col5=RS_300._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16"] + Merge Join Operator [MERGEJOIN_246] (rows=421645953 width=135) + Conds:RS_35._col5=RS_304._col0(Left Outer),Output:["_col1","_col4","_col6","_col7","_col9","_col10","_col16"] <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_300] + SHUFFLE [RS_304] PartitionCols:_col0 - Select Operator [SEL_299] (rows=2300 width=1179) + Select Operator [SEL_303] (rows=2300 width=1179) Output:["_col0"] TableScan [TS_18] (rows=2300 width=1179) default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_241] (rows=383314495 width=135) - Conds:RS_32._col3=RS_272._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_245] (rows=383314495 width=135) + Conds:RS_32._col3=RS_276._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col9","_col10"] <-Map 21 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_272] + PARTITION_ONLY_SHUFFLE [RS_276] PartitionCols:_col0 - Select Operator [SEL_271] (rows=3600 width=107) + Select Operator [SEL_275] (rows=3600 width=107) Output:["_col0"] - Filter Operator [FIL_270] (rows=3600 width=107) + Filter Operator [FIL_274] (rows=3600 width=107) predicate:((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) TableScan [TS_15] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_240] (rows=348467716 width=135) - Conds:RS_29._col2=RS_264._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_244] (rows=348467716 width=135) + Conds:RS_29._col2=RS_268._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] <-Map 19 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_264] + PARTITION_ONLY_SHUFFLE [RS_268] PartitionCols:_col0 - Select Operator [SEL_263] (rows=930900 width=385) + Select Operator [SEL_267] (rows=930900 width=385) Output:["_col0"] - Filter Operator [FIL_262] (rows=930900 width=385) + Filter Operator [FIL_266] (rows=930900 width=385) predicate:((cd_marital_status = 'M') and cd_demo_sk is not null) TableScan [TS_12] (rows=1861800 width=385) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_239] (rows=316788826 width=135) - Conds:RS_298._col0=RS_256._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_243] (rows=316788826 width=135) + Conds:RS_302._col0=RS_260._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10"] <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_256] + PARTITION_ONLY_SHUFFLE [RS_260] PartitionCols:_col0 - Select Operator [SEL_255] (rows=36524 width=1119) + Select Operator [SEL_259] (rows=36524 width=1119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_254] (rows=36524 width=1119) + Filter Operator [FIL_258] (rows=36524 width=1119) predicate:((d_year = 2001) and d_date_sk is not null and d_week_seq is not null) TableScan [TS_9] (rows=73049 width=1119) default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date","d_week_seq","d_year"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_298] + SHUFFLE [RS_302] PartitionCols:_col0 - Select Operator [SEL_297] (rows=287989836 width=135) + Select Operator [SEL_301] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_296] (rows=287989836 width=135) + Filter Operator [FIL_300] (rows=287989836 width=135) predicate:((cs_bill_cdemo_sk BETWEEN DynamicValue(RS_30_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_30_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_30_customer_demographics_cd_demo_sk_bloom_filter))) and (cs_bill_hdemo_sk BETWEEN DynamicValue(RS_33_household_demographics_hd_demo_sk_min) AND DynamicValue(RS_33_household_demographics_hd_demo_sk_max) and in_bloom_filter(cs_bill_hdemo_sk, DynamicValue(RS_33_household_demographics_hd_demo_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_39_item_i_item_sk_min) AND DynamicValue(RS_39_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_39_item_i_item_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_52_inventory_inv_item_sk_min) AND DynamicValue(RS_52_inventory_inv_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_52_inventory_inv_item_sk_bloom_filter))) and (cs_ship_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_ship_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_27_d1_d_date_sk_min) AND DynamicValue(RS_27_d1_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_27_d1_d_date_sk_bloom_filter))) and cs_bill_cdemo_sk is not null and cs_bill_hdemo_sk is not null and cs_item_sk is not null and cs_ship_date_sk is not null and cs_sold_date_sk is not null) TableScan [TS_6] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_bill_cdemo_sk","cs_bill_hdemo_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_quantity"] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_261] - Group By Operator [GBY_260] (rows=1 width=12) + BROADCAST [RS_265] + Group By Operator [GBY_264] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_259] - Group By Operator [GBY_258] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_263] + Group By Operator [GBY_262] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_257] (rows=36524 width=1119) + Select Operator [SEL_261] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_255] + Please refer to the previous Select Operator [SEL_259] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_269] - Group By Operator [GBY_268] (rows=1 width=12) + BROADCAST [RS_273] + Group By Operator [GBY_272] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_267] - Group By Operator [GBY_266] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_271] + Group By Operator [GBY_270] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_265] (rows=930900 width=385) + Select Operator [SEL_269] (rows=930900 width=385) Output:["_col0"] - Please refer to the previous Select Operator [SEL_263] + Please refer to the previous Select Operator [SEL_267] <-Reducer 22 [BROADCAST_EDGE] vectorized - BROADCAST [RS_277] - Group By Operator [GBY_276] (rows=1 width=12) + BROADCAST [RS_281] + Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_275] - Group By Operator [GBY_274] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_279] + Group By Operator [GBY_278] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_273] (rows=3600 width=107) + Select Operator [SEL_277] (rows=3600 width=107) Output:["_col0"] - Please refer to the previous Select Operator [SEL_271] + Please refer to the previous Select Operator [SEL_275] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_285] - Group By Operator [GBY_284] (rows=1 width=12) + BROADCAST [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_283] - Group By Operator [GBY_282] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_287] + Group By Operator [GBY_286] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_281] (rows=462000 width=1436) + Select Operator [SEL_285] (rows=462000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_279] + Please refer to the previous Select Operator [SEL_283] <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_295] - Group By Operator [GBY_294] (rows=1 width=12) + BROADCAST [RS_299] + Group By Operator [GBY_298] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_293] - Group By Operator [GBY_292] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_297] + Group By Operator [GBY_296] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_291] (rows=73049 width=1119) + Select Operator [SEL_295] (rows=73049 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_289] + Please refer to the previous Select Operator [SEL_293] <-Reducer 8 [BROADCAST_EDGE] vectorized - BROADCAST [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) + BROADCAST [RS_291] + Group By Operator [GBY_290] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=41342400)"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_174] @@ -319,5 +319,5 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=41342400)"] Select Operator [SEL_172] (rows=41342400 width=15) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_238] + Please refer to the previous Merge Join Operator [MERGEJOIN_242] diff --git a/ql/src/test/results/clientpositive/perf/tez/query77.q.out b/ql/src/test/results/clientpositive/perf/tez/query77.q.out index 163805b2a3..915d4fdb80 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query77.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query77.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[307][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[315][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain with ss as (select s_store_sk, @@ -249,296 +249,296 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_360] - Limit [LIM_359] (rows=100 width=163) + File Output Operator [FS_368] + Limit [LIM_367] (rows=100 width=163) Number of rows:100 - Select Operator [SEL_358] (rows=956329968 width=163) + Select Operator [SEL_366] (rows=956329968 width=163) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_357] - Select Operator [SEL_356] (rows=956329968 width=163) + SHUFFLE [RS_365] + Select Operator [SEL_364] (rows=956329968 width=163) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_355] (rows=956329968 width=163) + Group By Operator [GBY_363] (rows=956329968 width=163) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_311] + Reduce Output Operator [RS_319] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_310] (rows=1912659936 width=163) + Group By Operator [GBY_318] (rows=1912659936 width=163) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Select Operator [SEL_308] (rows=158394413 width=360) + Select Operator [SEL_316] (rows=158394413 width=360) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_307] (rows=158394413 width=360) + Merge Join Operator [MERGEJOIN_315] (rows=158394413 width=360) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_367] - Group By Operator [GBY_366] (rows=158394413 width=135) + PARTITION_ONLY_SHUFFLE [RS_375] + Group By Operator [GBY_374] (rows=158394413 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col0 Group By Operator [GBY_54] (rows=316788826 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col1 - Merge Join Operator [MERGEJOIN_293] (rows=316788826 width=135) - Conds:RS_365._col0=RS_322._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_301] (rows=316788826 width=135) + Conds:RS_373._col0=RS_330._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_322] + SHUFFLE [RS_330] PartitionCols:_col0 - Select Operator [SEL_318] (rows=8116 width=1119) + Select Operator [SEL_326] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_317] (rows=8116 width=1119) + Filter Operator [FIL_325] (rows=8116 width=1119) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_365] + SHUFFLE [RS_373] PartitionCols:_col0 - Select Operator [SEL_364] (rows=287989836 width=135) + Select Operator [SEL_372] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_363] (rows=287989836 width=135) + Filter Operator [FIL_371] (rows=287989836 width=135) predicate:((cs_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) TableScan [TS_44] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_362] - Group By Operator [GBY_361] (rows=1 width=12) + BROADCAST [RS_370] + Group By Operator [GBY_369] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] - Group By Operator [GBY_329] (rows=1 width=12) + SHUFFLE [RS_340] + Group By Operator [GBY_337] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_323] (rows=8116 width=1119) + Select Operator [SEL_331] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_372] - Group By Operator [GBY_371] (rows=1 width=224) + PARTITION_ONLY_SHUFFLE [RS_380] + Group By Operator [GBY_379] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Reducer 18 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_69] Group By Operator [GBY_68] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(_col1)","sum(_col2)"] - Merge Join Operator [MERGEJOIN_294] (rows=31678769 width=106) - Conds:RS_370._col0=RS_324._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_302] (rows=31678769 width=106) + Conds:RS_378._col0=RS_332._col0(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] + SHUFFLE [RS_332] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_370] + SHUFFLE [RS_378] PartitionCols:_col0 - Select Operator [SEL_369] (rows=28798881 width=106) + Select Operator [SEL_377] (rows=28798881 width=106) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_368] (rows=28798881 width=106) + Filter Operator [FIL_376] (rows=28798881 width=106) predicate:cr_returned_date_sk is not null TableScan [TS_58] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_return_amount","cr_net_loss"] <-Reducer 23 [CONTAINS] - Reduce Output Operator [RS_316] + Reduce Output Operator [RS_324] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_315] (rows=1912659936 width=163) + Group By Operator [GBY_323] (rows=1912659936 width=163) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Select Operator [SEL_313] (rows=95833780 width=135) + Select Operator [SEL_321] (rows=95833780 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_312] (rows=95833780 width=135) - Conds:RS_388._col0=RS_393._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_320] (rows=95833780 width=135) + Conds:RS_396._col0=RS_401._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_388] + FORWARD [RS_396] PartitionCols:_col0 - Group By Operator [GBY_387] (rows=87121617 width=135) + Group By Operator [GBY_395] (rows=87121617 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col0 Group By Operator [GBY_93] (rows=174243235 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col6 - Merge Join Operator [MERGEJOIN_296] (rows=174243235 width=135) - Conds:RS_89._col1=RS_377._col0(Inner),Output:["_col2","_col3","_col6"] + Merge Join Operator [MERGEJOIN_304] (rows=174243235 width=135) + Conds:RS_89._col1=RS_385._col0(Inner),Output:["_col2","_col3","_col6"] <-Map 34 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_377] + PARTITION_ONLY_SHUFFLE [RS_385] PartitionCols:_col0 - Select Operator [SEL_376] (rows=4602 width=585) + Select Operator [SEL_384] (rows=4602 width=585) Output:["_col0"] - Filter Operator [FIL_375] (rows=4602 width=585) + Filter Operator [FIL_383] (rows=4602 width=585) predicate:wp_web_page_sk is not null TableScan [TS_83] (rows=4602 width=585) default@web_page,web_page,Tbl:COMPLETE,Col:NONE,Output:["wp_web_page_sk"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_89] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_295] (rows=158402938 width=135) - Conds:RS_386._col0=RS_325._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_303] (rows=158402938 width=135) + Conds:RS_394._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] + SHUFFLE [RS_333] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_386] + SHUFFLE [RS_394] PartitionCols:_col0 - Select Operator [SEL_385] (rows=144002668 width=135) + Select Operator [SEL_393] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_384] (rows=144002668 width=135) + Filter Operator [FIL_392] (rows=144002668 width=135) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_87_date_dim_d_date_sk_min) AND DynamicValue(RS_87_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_87_date_dim_d_date_sk_bloom_filter))) and (ws_web_page_sk BETWEEN DynamicValue(RS_90_web_page_wp_web_page_sk_min) AND DynamicValue(RS_90_web_page_wp_web_page_sk_max) and in_bloom_filter(ws_web_page_sk, DynamicValue(RS_90_web_page_wp_web_page_sk_bloom_filter))) and ws_sold_date_sk is not null and ws_web_page_sk is not null) TableScan [TS_77] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_web_page_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_374] - Group By Operator [GBY_373] (rows=1 width=12) + BROADCAST [RS_382] + Group By Operator [GBY_381] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] - Group By Operator [GBY_330] (rows=1 width=12) + SHUFFLE [RS_341] + Group By Operator [GBY_338] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_326] (rows=8116 width=1119) + Select Operator [SEL_334] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_383] - Group By Operator [GBY_382] (rows=1 width=12) + BROADCAST [RS_391] + Group By Operator [GBY_390] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_381] - Group By Operator [GBY_380] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_389] + Group By Operator [GBY_388] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_378] (rows=4602 width=585) + Select Operator [SEL_386] (rows=4602 width=585) Output:["_col0"] - Please refer to the previous Select Operator [SEL_376] + Please refer to the previous Select Operator [SEL_384] <-Reducer 27 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_393] + FORWARD [RS_401] PartitionCols:_col0 - Group By Operator [GBY_392] (rows=8711072 width=92) + Group By Operator [GBY_400] (rows=8711072 width=92) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_114] PartitionCols:_col0 Group By Operator [GBY_113] (rows=17422145 width=92) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col6 - Merge Join Operator [MERGEJOIN_298] (rows=17422145 width=92) - Conds:RS_109._col1=RS_379._col0(Inner),Output:["_col2","_col3","_col6"] + Merge Join Operator [MERGEJOIN_306] (rows=17422145 width=92) + Conds:RS_109._col1=RS_387._col0(Inner),Output:["_col2","_col3","_col6"] <-Map 34 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_379] + PARTITION_ONLY_SHUFFLE [RS_387] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_376] + Please refer to the previous Select Operator [SEL_384] <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_297] (rows=15838314 width=92) - Conds:RS_391._col0=RS_327._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_305] (rows=15838314 width=92) + Conds:RS_399._col0=RS_335._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] + SHUFFLE [RS_335] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_391] + SHUFFLE [RS_399] PartitionCols:_col0 - Select Operator [SEL_390] (rows=14398467 width=92) + Select Operator [SEL_398] (rows=14398467 width=92) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_389] (rows=14398467 width=92) + Filter Operator [FIL_397] (rows=14398467 width=92) predicate:(wr_returned_date_sk is not null and wr_web_page_sk is not null) TableScan [TS_97] (rows=14398467 width=92) default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_returned_date_sk","wr_web_page_sk","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] - Reduce Output Operator [RS_306] + Reduce Output Operator [RS_314] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_305] (rows=1912659936 width=163) + Group By Operator [GBY_313] (rows=1912659936 width=163) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Select Operator [SEL_303] (rows=383325119 width=88) + Select Operator [SEL_311] (rows=383325119 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_302] (rows=383325119 width=88) - Conds:RS_349._col0=RS_354._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_310] (rows=383325119 width=88) + Conds:RS_357._col0=RS_362._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 13 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_354] + FORWARD [RS_362] PartitionCols:_col0 - Group By Operator [GBY_353] (rows=34842647 width=77) + Group By Operator [GBY_361] (rows=34842647 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 Group By Operator [GBY_36] (rows=69685294 width=77) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col6 - Merge Join Operator [MERGEJOIN_292] (rows=69685294 width=77) - Conds:RS_32._col1=RS_340._col0(Inner),Output:["_col2","_col3","_col6"] + Merge Join Operator [MERGEJOIN_300] (rows=69685294 width=77) + Conds:RS_32._col1=RS_348._col0(Inner),Output:["_col2","_col3","_col6"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_340] + SHUFFLE [RS_348] PartitionCols:_col0 - Select Operator [SEL_337] (rows=1704 width=1910) + Select Operator [SEL_345] (rows=1704 width=1910) Output:["_col0"] - Filter Operator [FIL_336] (rows=1704 width=1910) + Filter Operator [FIL_344] (rows=1704 width=1910) predicate:s_store_sk is not null TableScan [TS_6] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_291] (rows=63350266 width=77) - Conds:RS_352._col0=RS_321._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_299] (rows=63350266 width=77) + Conds:RS_360._col0=RS_329._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + SHUFFLE [RS_329] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] + SHUFFLE [RS_360] PartitionCols:_col0 - Select Operator [SEL_351] (rows=57591150 width=77) + Select Operator [SEL_359] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_350] (rows=57591150 width=77) + Filter Operator [FIL_358] (rows=57591150 width=77) predicate:(sr_returned_date_sk is not null and sr_store_sk is not null) TableScan [TS_20] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] <-Reducer 4 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_349] + FORWARD [RS_357] PartitionCols:_col0 - Group By Operator [GBY_348] (rows=348477374 width=88) + Group By Operator [GBY_356] (rows=348477374 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=696954748 width=88) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col6 - Merge Join Operator [MERGEJOIN_290] (rows=696954748 width=88) - Conds:RS_12._col1=RS_338._col0(Inner),Output:["_col2","_col3","_col6"] + Merge Join Operator [MERGEJOIN_298] (rows=696954748 width=88) + Conds:RS_12._col1=RS_346._col0(Inner),Output:["_col2","_col3","_col6"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] + SHUFFLE [RS_346] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_337] + Please refer to the previous Select Operator [SEL_345] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_289] (rows=633595212 width=88) - Conds:RS_347._col0=RS_319._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_297] (rows=633595212 width=88) + Conds:RS_355._col0=RS_327._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] + SHUFFLE [RS_327] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] + SHUFFLE [RS_355] PartitionCols:_col0 - Select Operator [SEL_346] (rows=575995635 width=88) + Select Operator [SEL_354] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_345] (rows=575995635 width=88) + Filter Operator [FIL_353] (rows=575995635 width=88) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_13_store_s_store_sk_min) AND DynamicValue(RS_13_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_13_store_s_store_sk_bloom_filter))) and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_335] - Group By Operator [GBY_334] (rows=1 width=12) + BROADCAST [RS_343] + Group By Operator [GBY_342] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] - Group By Operator [GBY_328] (rows=1 width=12) + SHUFFLE [RS_339] + Group By Operator [GBY_336] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_320] (rows=8116 width=1119) + Select Operator [SEL_328] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_318] + Please refer to the previous Select Operator [SEL_326] <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_344] - Group By Operator [GBY_343] (rows=1 width=12) + BROADCAST [RS_352] + Group By Operator [GBY_351] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 28 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] - Group By Operator [GBY_341] (rows=1 width=12) + SHUFFLE [RS_350] + Group By Operator [GBY_349] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_339] (rows=1704 width=1910) + Select Operator [SEL_347] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_337] + Please refer to the previous Select Operator [SEL_345] diff --git a/ql/src/test/results/clientpositive/perf/tez/query78.q.out b/ql/src/test/results/clientpositive/perf/tez/query78.q.out index 90b6f17e1d..b1102603c7 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query78.q.out @@ -139,10 +139,10 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_235] - Limit [LIM_234] (rows=100 width=88) + File Output Operator [FS_238] + Limit [LIM_237] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_233] (rows=23425424 width=88) + Select Operator [SEL_236] (rows=23425424 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_73] @@ -150,28 +150,28 @@ Stage-0 Output:["_col0","_col1","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] Filter Operator [FIL_71] (rows=23425424 width=88) predicate:(COALESCE(_col11,0) > 0) - Merge Join Operator [MERGEJOIN_188] (rows=70276272 width=88) - Conds:RS_68._col1=RS_232._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_191] (rows=70276272 width=88) + Conds:RS_68._col1=RS_235._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9","_col11","_col12","_col13"] <-Reducer 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] + SHUFFLE [RS_235] PartitionCols:_col0 - Select Operator [SEL_231] (rows=43558464 width=135) + Select Operator [SEL_234] (rows=43558464 width=135) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_230] (rows=43558464 width=135) + Group By Operator [GBY_233] (rows=43558464 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col0, _col1 Group By Operator [GBY_64] (rows=87116928 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col3, _col4 - Merge Join Operator [MERGEJOIN_186] (rows=87116928 width=135) - Conds:RS_195._col0=RS_61._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_189] (rows=87116928 width=135) + Conds:RS_198._col0=RS_61._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] + SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_190] (rows=36524 width=1119) + Select Operator [SEL_193] (rows=36524 width=1119) Output:["_col0"] - Filter Operator [FIL_189] (rows=36524 width=1119) + Filter Operator [FIL_192] (rows=36524 width=1119) predicate:((d_year = 2000) and d_date_sk is not null) TableScan [TS_0] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] @@ -182,32 +182,32 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_58] (rows=79197206 width=135) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_185] (rows=158394413 width=135) - Conds:RS_227._col2, _col3=RS_229._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + Merge Join Operator [MERGEJOIN_188] (rows=158394413 width=135) + Conds:RS_230._col2, _col3=RS_232._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_227] + SHUFFLE [RS_230] PartitionCols:_col2, _col3 - Select Operator [SEL_226] (rows=143994918 width=135) + Select Operator [SEL_229] (rows=143994918 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_225] (rows=143994918 width=135) + Filter Operator [FIL_228] (rows=143994918 width=135) predicate:((cs_item_sk = cs_item_sk) and (cs_sold_date_sk BETWEEN DynamicValue(RS_60_date_dim_d_date_sk_min) AND DynamicValue(RS_60_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_60_date_dim_d_date_sk_bloom_filter))) and cs_sold_date_sk is not null) TableScan [TS_50] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_order_number","cs_quantity","cs_wholesale_cost","cs_sales_price"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_224] - Group By Operator [GBY_223] (rows=1 width=12) + BROADCAST [RS_227] + Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] - Group By Operator [GBY_199] (rows=1 width=12) + SHUFFLE [RS_205] + Group By Operator [GBY_202] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_196] (rows=36524 width=1119) + Select Operator [SEL_199] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_193] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] + SHUFFLE [RS_232] PartitionCols:_col0, _col1 - Select Operator [SEL_228] (rows=28798881 width=106) + Select Operator [SEL_231] (rows=28798881 width=106) Output:["_col0","_col1"] TableScan [TS_53] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number"] @@ -216,26 +216,26 @@ Stage-0 PartitionCols:_col1 Filter Operator [FIL_45] (rows=63887519 width=88) predicate:(COALESCE(_col7,0) > 0) - Merge Join Operator [MERGEJOIN_187] (rows=191662559 width=88) - Conds:RS_212._col1, _col0=RS_222._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_190] (rows=191662559 width=88) + Conds:RS_215._col1, _col0=RS_225._col1, _col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4","_col7","_col8","_col9"] <-Reducer 3 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_212] + FORWARD [RS_215] PartitionCols:_col1, _col0 - Select Operator [SEL_211] (rows=174238687 width=88) + Select Operator [SEL_214] (rows=174238687 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_210] (rows=174238687 width=88) + Group By Operator [GBY_213] (rows=174238687 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0, _col1 Group By Operator [GBY_17] (rows=348477374 width=88) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_182] (rows=348477374 width=88) - Conds:RS_191._col0=RS_14._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_185] (rows=348477374 width=88) + Conds:RS_194._col0=RS_14._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + SHUFFLE [RS_194] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_193] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_14] PartitionCols:_col0 @@ -243,53 +243,53 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_11] (rows=316797606 width=88) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_181] (rows=633595212 width=88) - Conds:RS_207._col1, _col3=RS_209._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + Merge Join Operator [MERGEJOIN_184] (rows=633595212 width=88) + Conds:RS_210._col1, _col3=RS_212._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] + SHUFFLE [RS_210] PartitionCols:_col1, _col3 - Select Operator [SEL_206] (rows=575995635 width=88) + Select Operator [SEL_209] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_205] (rows=575995635 width=88) + Filter Operator [FIL_208] (rows=575995635 width=88) predicate:((ss_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ss_sold_date_sk is not null) TableScan [TS_3] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_ticket_number","ss_quantity","ss_wholesale_cost","ss_sales_price"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_204] - Group By Operator [GBY_203] (rows=1 width=12) + BROADCAST [RS_207] + Group By Operator [GBY_206] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] - Group By Operator [GBY_197] (rows=1 width=12) + SHUFFLE [RS_203] + Group By Operator [GBY_200] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_192] (rows=36524 width=1119) + Select Operator [SEL_195] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_193] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] + SHUFFLE [RS_212] PartitionCols:_col0, _col1 - Select Operator [SEL_208] (rows=57591150 width=77) + Select Operator [SEL_211] (rows=57591150 width=77) Output:["_col0","_col1"] TableScan [TS_6] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_222] + FORWARD [RS_225] PartitionCols:_col1, _col0 - Select Operator [SEL_221] (rows=43560808 width=135) + Select Operator [SEL_224] (rows=43560808 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_220] (rows=43560808 width=135) + Group By Operator [GBY_223] (rows=43560808 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col0, _col1 Group By Operator [GBY_38] (rows=87121617 width=135) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col4, _col3 - Merge Join Operator [MERGEJOIN_184] (rows=87121617 width=135) - Conds:RS_193._col0=RS_35._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_187] (rows=87121617 width=135) + Conds:RS_196._col0=RS_35._col0(Inner),Output:["_col3","_col4","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + SHUFFLE [RS_196] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_193] <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0 @@ -297,32 +297,32 @@ Stage-0 Output:["_col0","_col1","_col2","_col4","_col5","_col6"] Filter Operator [FIL_32] (rows=79201469 width=135) predicate:_col8 is null - Merge Join Operator [MERGEJOIN_183] (rows=158402938 width=135) - Conds:RS_217._col1, _col3=RS_219._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] + Merge Join Operator [MERGEJOIN_186] (rows=158402938 width=135) + Conds:RS_220._col1, _col3=RS_222._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col6","_col8"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + SHUFFLE [RS_220] PartitionCols:_col1, _col3 - Select Operator [SEL_216] (rows=144002668 width=135) + Select Operator [SEL_219] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_215] (rows=144002668 width=135) + Filter Operator [FIL_218] (rows=144002668 width=135) predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_34_date_dim_d_date_sk_min) AND DynamicValue(RS_34_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_34_date_dim_d_date_sk_bloom_filter))) and ws_sold_date_sk is not null) TableScan [TS_24] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_order_number","ws_quantity","ws_wholesale_cost","ws_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_214] - Group By Operator [GBY_213] (rows=1 width=12) + BROADCAST [RS_217] + Group By Operator [GBY_216] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] - Group By Operator [GBY_198] (rows=1 width=12) + SHUFFLE [RS_204] + Group By Operator [GBY_201] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_194] (rows=36524 width=1119) + Select Operator [SEL_197] (rows=36524 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_193] <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] + SHUFFLE [RS_222] PartitionCols:_col0, _col1 - Select Operator [SEL_218] (rows=14398467 width=92) + Select Operator [SEL_221] (rows=14398467 width=92) Output:["_col0","_col1"] TableScan [TS_27] (rows=14398467 width=92) default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_order_number"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query80.q.out b/ql/src/test/results/clientpositive/perf/tez/query80.q.out index 816b525c30..ee8cdd82a3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query80.q.out @@ -232,26 +232,26 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_457] - Limit [LIM_456] (rows=100 width=108) + File Output Operator [FS_460] + Limit [LIM_459] (rows=100 width=108) Number of rows:100 - Select Operator [SEL_455] (rows=1217531358 width=108) + Select Operator [SEL_458] (rows=1217531358 width=108) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_454] - Select Operator [SEL_453] (rows=1217531358 width=108) + SHUFFLE [RS_457] + Select Operator [SEL_456] (rows=1217531358 width=108) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_452] (rows=1217531358 width=108) + Group By Operator [GBY_455] (rows=1217531358 width=108) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 8 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_481] + Reduce Output Operator [RS_484] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_480] (rows=2435062716 width=108) + Group By Operator [GBY_483] (rows=2435062716 width=108) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Select Operator [SEL_479] (rows=231905279 width=135) + Select Operator [SEL_482] (rows=231905279 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_478] (rows=231905279 width=135) + Group By Operator [GBY_481] (rows=231905279 width=135) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_75] @@ -260,134 +260,134 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_72] (rows=463810558 width=135) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_365] (rows=463810558 width=135) - Conds:RS_69._col1=RS_466._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] + Merge Join Operator [MERGEJOIN_368] (rows=463810558 width=135) + Conds:RS_69._col1=RS_469._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] <-Map 39 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_466] + PARTITION_ONLY_SHUFFLE [RS_469] PartitionCols:_col0 - Select Operator [SEL_465] (rows=46000 width=460) + Select Operator [SEL_468] (rows=46000 width=460) Output:["_col0","_col1"] - Filter Operator [FIL_464] (rows=46000 width=460) + Filter Operator [FIL_467] (rows=46000 width=460) predicate:cp_catalog_page_sk is not null TableScan [TS_54] (rows=46000 width=460) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:NONE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_364] (rows=421645953 width=135) - Conds:RS_66._col3=RS_422._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_367] (rows=421645953 width=135) + Conds:RS_66._col3=RS_425._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_422] + SHUFFLE [RS_425] PartitionCols:_col0 - Select Operator [SEL_419] (rows=1150 width=1179) + Select Operator [SEL_422] (rows=1150 width=1179) Output:["_col0"] - Filter Operator [FIL_418] (rows=1150 width=1179) + Filter Operator [FIL_421] (rows=1150 width=1179) predicate:((p_channel_tv = 'N') and p_promo_sk is not null) TableScan [TS_12] (rows=2300 width=1179) default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_tv"] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_363] (rows=383314495 width=135) - Conds:RS_63._col2=RS_406._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_366] (rows=383314495 width=135) + Conds:RS_63._col2=RS_409._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_406] + SHUFFLE [RS_409] PartitionCols:_col0 - Select Operator [SEL_403] (rows=154000 width=1436) + Select Operator [SEL_406] (rows=154000 width=1436) Output:["_col0"] - Filter Operator [FIL_402] (rows=154000 width=1436) + Filter Operator [FIL_405] (rows=154000 width=1436) predicate:((i_current_price > 50) and i_item_sk is not null) TableScan [TS_9] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_362] (rows=348467716 width=135) - Conds:RS_60._col0=RS_390._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_365] (rows=348467716 width=135) + Conds:RS_60._col0=RS_393._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_390] + SHUFFLE [RS_393] PartitionCols:_col0 - Select Operator [SEL_387] (rows=8116 width=1119) + Select Operator [SEL_390] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_386] (rows=8116 width=1119) + Filter Operator [FIL_389] (rows=8116 width=1119) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Reducer 37 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_361] (rows=316788826 width=135) - Conds:RS_474._col2, _col4=RS_477._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_364] (rows=316788826 width=135) + Conds:RS_477._col2, _col4=RS_480._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_474] + SHUFFLE [RS_477] PartitionCols:_col2, _col4 - Select Operator [SEL_473] (rows=287989836 width=135) + Select Operator [SEL_476] (rows=287989836 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_472] (rows=287989836 width=135) + Filter Operator [FIL_475] (rows=287989836 width=135) predicate:((cs_catalog_page_sk BETWEEN DynamicValue(RS_70_catalog_page_cp_catalog_page_sk_min) AND DynamicValue(RS_70_catalog_page_cp_catalog_page_sk_max) and in_bloom_filter(cs_catalog_page_sk, DynamicValue(RS_70_catalog_page_cp_catalog_page_sk_bloom_filter))) and (cs_item_sk BETWEEN DynamicValue(RS_64_item_i_item_sk_min) AND DynamicValue(RS_64_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_64_item_i_item_sk_bloom_filter))) and (cs_promo_sk BETWEEN DynamicValue(RS_67_promotion_p_promo_sk_min) AND DynamicValue(RS_67_promotion_p_promo_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_67_promotion_p_promo_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) and cs_catalog_page_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) TableScan [TS_39] (rows=287989836 width=135) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_ext_sales_price","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_459] - Group By Operator [GBY_458] (rows=1 width=12) + BROADCAST [RS_462] + Group By Operator [GBY_461] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_398] - Group By Operator [GBY_395] (rows=1 width=12) + SHUFFLE [RS_401] + Group By Operator [GBY_398] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_391] (rows=8116 width=1119) + Select Operator [SEL_394] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_387] + Please refer to the previous Select Operator [SEL_390] <-Reducer 28 [BROADCAST_EDGE] vectorized - BROADCAST [RS_461] - Group By Operator [GBY_460] (rows=1 width=12) + BROADCAST [RS_464] + Group By Operator [GBY_463] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_414] - Group By Operator [GBY_411] (rows=1 width=12) + SHUFFLE [RS_417] + Group By Operator [GBY_414] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_407] (rows=154000 width=1436) + Select Operator [SEL_410] (rows=154000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_403] + Please refer to the previous Select Operator [SEL_406] <-Reducer 32 [BROADCAST_EDGE] vectorized - BROADCAST [RS_463] - Group By Operator [GBY_462] (rows=1 width=12) + BROADCAST [RS_466] + Group By Operator [GBY_465] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_430] - Group By Operator [GBY_427] (rows=1 width=12) + SHUFFLE [RS_433] + Group By Operator [GBY_430] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_423] (rows=1150 width=1179) + Select Operator [SEL_426] (rows=1150 width=1179) Output:["_col0"] - Please refer to the previous Select Operator [SEL_419] + Please refer to the previous Select Operator [SEL_422] <-Reducer 40 [BROADCAST_EDGE] vectorized - BROADCAST [RS_471] - Group By Operator [GBY_470] (rows=1 width=12) + BROADCAST [RS_474] + Group By Operator [GBY_473] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_469] - Group By Operator [GBY_468] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_472] + Group By Operator [GBY_471] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_467] (rows=46000 width=460) + Select Operator [SEL_470] (rows=46000 width=460) Output:["_col0"] - Please refer to the previous Select Operator [SEL_465] + Please refer to the previous Select Operator [SEL_468] <-Map 38 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_477] + SHUFFLE [RS_480] PartitionCols:_col0, _col1 - Select Operator [SEL_476] (rows=28798881 width=106) + Select Operator [SEL_479] (rows=28798881 width=106) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_475] (rows=28798881 width=106) + Filter Operator [FIL_478] (rows=28798881 width=106) predicate:cr_item_sk is not null TableScan [TS_42] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_item_sk","cr_order_number","cr_return_amount","cr_net_loss"] <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_505] + Reduce Output Operator [RS_508] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_504] (rows=2435062716 width=108) + Group By Operator [GBY_507] (rows=2435062716 width=108) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Select Operator [SEL_503] (rows=115958879 width=135) + Select Operator [SEL_506] (rows=115958879 width=135) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_502] (rows=115958879 width=135) + Group By Operator [GBY_505] (rows=115958879 width=135) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_115] @@ -396,119 +396,119 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_112] (rows=231917759 width=135) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_370] (rows=231917759 width=135) - Conds:RS_109._col2=RS_490._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] + Merge Join Operator [MERGEJOIN_373] (rows=231917759 width=135) + Conds:RS_109._col2=RS_493._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] <-Map 44 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_490] + PARTITION_ONLY_SHUFFLE [RS_493] PartitionCols:_col0 - Select Operator [SEL_489] (rows=84 width=1850) + Select Operator [SEL_492] (rows=84 width=1850) Output:["_col0","_col1"] - Filter Operator [FIL_488] (rows=84 width=1850) + Filter Operator [FIL_491] (rows=84 width=1850) predicate:web_site_sk is not null TableScan [TS_94] (rows=84 width=1850) default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_site_id"] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_369] (rows=210834322 width=135) - Conds:RS_106._col3=RS_424._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_372] (rows=210834322 width=135) + Conds:RS_106._col3=RS_427._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_424] + SHUFFLE [RS_427] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_419] + Please refer to the previous Select Operator [SEL_422] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_368] (rows=191667562 width=135) - Conds:RS_103._col1=RS_408._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_371] (rows=191667562 width=135) + Conds:RS_103._col1=RS_411._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_408] + SHUFFLE [RS_411] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_403] + Please refer to the previous Select Operator [SEL_406] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_367] (rows=174243235 width=135) - Conds:RS_100._col0=RS_392._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_370] (rows=174243235 width=135) + Conds:RS_100._col0=RS_395._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_392] + SHUFFLE [RS_395] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_387] + Please refer to the previous Select Operator [SEL_390] <-Reducer 42 [SIMPLE_EDGE] SHUFFLE [RS_100] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_366] (rows=158402938 width=135) - Conds:RS_498._col1, _col4=RS_501._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_369] (rows=158402938 width=135) + Conds:RS_501._col1, _col4=RS_504._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 41 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_498] + SHUFFLE [RS_501] PartitionCols:_col1, _col4 - Select Operator [SEL_497] (rows=144002668 width=135) + Select Operator [SEL_500] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_496] (rows=144002668 width=135) + Filter Operator [FIL_499] (rows=144002668 width=135) predicate:((ws_item_sk BETWEEN DynamicValue(RS_104_item_i_item_sk_min) AND DynamicValue(RS_104_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_104_item_i_item_sk_bloom_filter))) and (ws_promo_sk BETWEEN DynamicValue(RS_107_promotion_p_promo_sk_min) AND DynamicValue(RS_107_promotion_p_promo_sk_max) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_107_promotion_p_promo_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_101_date_dim_d_date_sk_min) AND DynamicValue(RS_101_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_101_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_110_web_site_web_site_sk_min) AND DynamicValue(RS_110_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_110_web_site_web_site_sk_bloom_filter))) and ws_item_sk is not null and ws_promo_sk is not null and ws_sold_date_sk is not null and ws_web_site_sk is not null) TableScan [TS_79] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_site_sk","ws_promo_sk","ws_order_number","ws_ext_sales_price","ws_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_483] - Group By Operator [GBY_482] (rows=1 width=12) + BROADCAST [RS_486] + Group By Operator [GBY_485] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_399] - Group By Operator [GBY_396] (rows=1 width=12) + SHUFFLE [RS_402] + Group By Operator [GBY_399] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_393] (rows=8116 width=1119) + Select Operator [SEL_396] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_387] + Please refer to the previous Select Operator [SEL_390] <-Reducer 29 [BROADCAST_EDGE] vectorized - BROADCAST [RS_485] - Group By Operator [GBY_484] (rows=1 width=12) + BROADCAST [RS_488] + Group By Operator [GBY_487] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_415] - Group By Operator [GBY_412] (rows=1 width=12) + SHUFFLE [RS_418] + Group By Operator [GBY_415] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_409] (rows=154000 width=1436) + Select Operator [SEL_412] (rows=154000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_403] + Please refer to the previous Select Operator [SEL_406] <-Reducer 33 [BROADCAST_EDGE] vectorized - BROADCAST [RS_487] - Group By Operator [GBY_486] (rows=1 width=12) + BROADCAST [RS_490] + Group By Operator [GBY_489] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_431] - Group By Operator [GBY_428] (rows=1 width=12) + SHUFFLE [RS_434] + Group By Operator [GBY_431] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_425] (rows=1150 width=1179) + Select Operator [SEL_428] (rows=1150 width=1179) Output:["_col0"] - Please refer to the previous Select Operator [SEL_419] + Please refer to the previous Select Operator [SEL_422] <-Reducer 45 [BROADCAST_EDGE] vectorized - BROADCAST [RS_495] - Group By Operator [GBY_494] (rows=1 width=12) + BROADCAST [RS_498] + Group By Operator [GBY_497] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 44 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_493] - Group By Operator [GBY_492] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_496] + Group By Operator [GBY_495] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_491] (rows=84 width=1850) + Select Operator [SEL_494] (rows=84 width=1850) Output:["_col0"] - Please refer to the previous Select Operator [SEL_489] + Please refer to the previous Select Operator [SEL_492] <-Map 43 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_501] + SHUFFLE [RS_504] PartitionCols:_col0, _col1 - Select Operator [SEL_500] (rows=14398467 width=92) + Select Operator [SEL_503] (rows=14398467 width=92) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_499] (rows=14398467 width=92) + Filter Operator [FIL_502] (rows=14398467 width=92) predicate:wr_item_sk is not null TableScan [TS_82] (rows=14398467 width=92) default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 7 [CONTAINS] vectorized - Reduce Output Operator [RS_451] + Reduce Output Operator [RS_454] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_450] (rows=2435062716 width=108) + Group By Operator [GBY_453] (rows=2435062716 width=108) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Select Operator [SEL_449] (rows=463823414 width=88) + Select Operator [SEL_452] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_448] (rows=463823414 width=88) + Group By Operator [GBY_451] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_36] @@ -517,108 +517,108 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_33] (rows=927646829 width=88) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_360] (rows=927646829 width=88) - Conds:RS_30._col2=RS_436._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] + Merge Join Operator [MERGEJOIN_363] (rows=927646829 width=88) + Conds:RS_30._col2=RS_439._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col18"] <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_436] + SHUFFLE [RS_439] PartitionCols:_col0 - Select Operator [SEL_435] (rows=1704 width=1910) + Select Operator [SEL_438] (rows=1704 width=1910) Output:["_col0","_col1"] - Filter Operator [FIL_434] (rows=1704 width=1910) + Filter Operator [FIL_437] (rows=1704 width=1910) predicate:s_store_sk is not null TableScan [TS_15] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_id"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_359] (rows=843315281 width=88) - Conds:RS_27._col3=RS_420._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_362] (rows=843315281 width=88) + Conds:RS_27._col3=RS_423._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_420] + SHUFFLE [RS_423] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_419] + Please refer to the previous Select Operator [SEL_422] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_358] (rows=766650239 width=88) - Conds:RS_24._col1=RS_404._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_361] (rows=766650239 width=88) + Conds:RS_24._col1=RS_407._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_404] + SHUFFLE [RS_407] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_403] + Please refer to the previous Select Operator [SEL_406] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_357] (rows=696954748 width=88) - Conds:RS_21._col0=RS_388._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_360] (rows=696954748 width=88) + Conds:RS_21._col0=RS_391._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_388] + SHUFFLE [RS_391] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_387] + Please refer to the previous Select Operator [SEL_390] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_356] (rows=633595212 width=88) - Conds:RS_444._col1, _col4=RS_447._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_359] (rows=633595212 width=88) + Conds:RS_447._col1, _col4=RS_450._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_444] + SHUFFLE [RS_447] PartitionCols:_col1, _col4 - Select Operator [SEL_443] (rows=575995635 width=88) + Select Operator [SEL_446] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_442] (rows=575995635 width=88) + Filter Operator [FIL_445] (rows=575995635 width=88) predicate:((ss_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) and (ss_promo_sk BETWEEN DynamicValue(RS_28_promotion_p_promo_sk_min) AND DynamicValue(RS_28_promotion_p_promo_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_28_promotion_p_promo_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) and (ss_store_sk BETWEEN DynamicValue(RS_31_store_s_store_sk_min) AND DynamicValue(RS_31_store_s_store_sk_max) and in_bloom_filter(ss_store_sk, DynamicValue(RS_31_store_s_store_sk_bloom_filter))) and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_ext_sales_price","ss_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_401] - Group By Operator [GBY_400] (rows=1 width=12) + BROADCAST [RS_404] + Group By Operator [GBY_403] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_397] - Group By Operator [GBY_394] (rows=1 width=12) + SHUFFLE [RS_400] + Group By Operator [GBY_397] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_389] (rows=8116 width=1119) + Select Operator [SEL_392] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_387] + Please refer to the previous Select Operator [SEL_390] <-Reducer 27 [BROADCAST_EDGE] vectorized - BROADCAST [RS_417] - Group By Operator [GBY_416] (rows=1 width=12) + BROADCAST [RS_420] + Group By Operator [GBY_419] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 26 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_413] - Group By Operator [GBY_410] (rows=1 width=12) + SHUFFLE [RS_416] + Group By Operator [GBY_413] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_405] (rows=154000 width=1436) + Select Operator [SEL_408] (rows=154000 width=1436) Output:["_col0"] - Please refer to the previous Select Operator [SEL_403] + Please refer to the previous Select Operator [SEL_406] <-Reducer 31 [BROADCAST_EDGE] vectorized - BROADCAST [RS_433] - Group By Operator [GBY_432] (rows=1 width=12) + BROADCAST [RS_436] + Group By Operator [GBY_435] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 30 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_429] - Group By Operator [GBY_426] (rows=1 width=12) + SHUFFLE [RS_432] + Group By Operator [GBY_429] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_421] (rows=1150 width=1179) + Select Operator [SEL_424] (rows=1150 width=1179) Output:["_col0"] - Please refer to the previous Select Operator [SEL_419] + Please refer to the previous Select Operator [SEL_422] <-Reducer 35 [BROADCAST_EDGE] vectorized - BROADCAST [RS_441] - Group By Operator [GBY_440] (rows=1 width=12) + BROADCAST [RS_444] + Group By Operator [GBY_443] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 34 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_439] - Group By Operator [GBY_438] (rows=1 width=12) + SHUFFLE [RS_442] + Group By Operator [GBY_441] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_437] (rows=1704 width=1910) + Select Operator [SEL_440] (rows=1704 width=1910) Output:["_col0"] - Please refer to the previous Select Operator [SEL_435] + Please refer to the previous Select Operator [SEL_438] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_447] + SHUFFLE [RS_450] PartitionCols:_col0, _col1 - Select Operator [SEL_446] (rows=57591150 width=77) + Select Operator [SEL_449] (rows=57591150 width=77) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_445] (rows=57591150 width=77) + Filter Operator [FIL_448] (rows=57591150 width=77) predicate:sr_item_sk is not null TableScan [TS_3] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number","sr_return_amt","sr_net_loss"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query91.q.out b/ql/src/test/results/clientpositive/perf/tez/query91.q.out index 5e0f00a3e7..a53c7d796d 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query91.q.out @@ -75,109 +75,109 @@ Stage-0 limit:-1 Stage-1 Reducer 6 vectorized - File Output Operator [FS_168] - Select Operator [SEL_167] (rows=58564004 width=860) + File Output Operator [FS_170] + Select Operator [SEL_169] (rows=58564004 width=860) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_166] - Select Operator [SEL_165] (rows=58564004 width=860) + SHUFFLE [RS_168] + Select Operator [SEL_167] (rows=58564004 width=860) Output:["_col0","_col1","_col2","_col4"] - Group By Operator [GBY_164] (rows=58564004 width=860) + Group By Operator [GBY_166] (rows=58564004 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_41] (rows=117128008 width=860) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col3)"],keys:_col8, _col9, _col10, _col18, _col19 - Merge Join Operator [MERGEJOIN_142] (rows=117128008 width=860) + Merge Join Operator [MERGEJOIN_144] (rows=117128008 width=860) Conds:RS_37._col1=RS_38._col2(Inner),Output:["_col3","_col8","_col9","_col10","_col18","_col19"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col2 Select Operator [SEL_30] (rows=106480005 width=860) Output:["_col2","_col7","_col8"] - Merge Join Operator [MERGEJOIN_141] (rows=106480005 width=860) - Conds:RS_27._col2=RS_163._col0(Inner),Output:["_col0","_col5","_col6"] + Merge Join Operator [MERGEJOIN_143] (rows=106480005 width=860) + Conds:RS_27._col2=RS_165._col0(Inner),Output:["_col0","_col5","_col6"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] + SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_162] (rows=3600 width=107) + Select Operator [SEL_164] (rows=3600 width=107) Output:["_col0"] - Filter Operator [FIL_161] (rows=3600 width=107) + Filter Operator [FIL_163] (rows=3600 width=107) predicate:((hd_buy_potential like '0-500%') and hd_demo_sk is not null) TableScan [TS_18] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_buy_potential"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_140] (rows=96800003 width=860) - Conds:RS_24._col3=RS_160._col0(Inner),Output:["_col0","_col2","_col5","_col6"] + Merge Join Operator [MERGEJOIN_142] (rows=96800003 width=860) + Conds:RS_24._col3=RS_162._col0(Inner),Output:["_col0","_col2","_col5","_col6"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] + SHUFFLE [RS_162] PartitionCols:_col0 - Select Operator [SEL_159] (rows=20000000 width=1014) + Select Operator [SEL_161] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_158] (rows=20000000 width=1014) + Filter Operator [FIL_160] (rows=20000000 width=1014) predicate:((ca_gmt_offset = -7) and ca_address_sk is not null) TableScan [TS_15] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_gmt_offset"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_139] (rows=88000001 width=860) - Conds:RS_154._col1=RS_157._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] + Merge Join Operator [MERGEJOIN_141] (rows=88000001 width=860) + Conds:RS_156._col1=RS_159._col0(Inner),Output:["_col0","_col2","_col3","_col5","_col6"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_157] + SHUFFLE [RS_159] PartitionCols:_col0 - Select Operator [SEL_156] (rows=930900 width=385) + Select Operator [SEL_158] (rows=930900 width=385) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_155] (rows=930900 width=385) + Filter Operator [FIL_157] (rows=930900 width=385) predicate:((((cd_marital_status = 'M') and (cd_education_status = 'Unknown')) or ((cd_marital_status = 'W') and (cd_education_status = 'Advanced Degree'))) and ((cd_education_status = 'Unknown') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'W')) and cd_demo_sk is not null) TableScan [TS_12] (rows=1861800 width=385) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_154] + SHUFFLE [RS_156] PartitionCols:_col1 - Select Operator [SEL_153] (rows=80000000 width=860) + Select Operator [SEL_155] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_152] (rows=80000000 width=860) + Filter Operator [FIL_154] (rows=80000000 width=860) predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null and c_current_hdemo_sk is not null and c_customer_sk is not null) TableScan [TS_9] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_hdemo_sk","c_current_addr_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_138] (rows=34846646 width=106) - Conds:RS_34._col2=RS_151._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_140] (rows=34846646 width=106) + Conds:RS_34._col2=RS_153._col0(Inner),Output:["_col1","_col3","_col8","_col9","_col10"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_151] + SHUFFLE [RS_153] PartitionCols:_col0 - Select Operator [SEL_150] (rows=60 width=2045) + Select Operator [SEL_152] (rows=60 width=2045) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_149] (rows=60 width=2045) + Filter Operator [FIL_151] (rows=60 width=2045) predicate:cc_call_center_sk is not null TableScan [TS_6] (rows=60 width=2045) default@call_center,call_center,Tbl:COMPLETE,Col:NONE,Output:["cc_call_center_sk","cc_call_center_id","cc_name","cc_manager"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_137] (rows=31678769 width=106) - Conds:RS_145._col0=RS_148._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_139] (rows=31678769 width=106) + Conds:RS_147._col0=RS_150._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_145] + SHUFFLE [RS_147] PartitionCols:_col0 - Select Operator [SEL_144] (rows=28798881 width=106) + Select Operator [SEL_146] (rows=28798881 width=106) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_143] (rows=28798881 width=106) + Filter Operator [FIL_145] (rows=28798881 width=106) predicate:(cr_call_center_sk is not null and cr_returned_date_sk is not null and cr_returning_customer_sk is not null) TableScan [TS_0] (rows=28798881 width=106) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:NONE,Output:["cr_returned_date_sk","cr_returning_customer_sk","cr_call_center_sk","cr_net_loss"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_148] + SHUFFLE [RS_150] PartitionCols:_col0 - Select Operator [SEL_147] (rows=18262 width=1119) + Select Operator [SEL_149] (rows=18262 width=1119) Output:["_col0"] - Filter Operator [FIL_146] (rows=18262 width=1119) + Filter Operator [FIL_148] (rows=18262 width=1119) predicate:((d_moy = 11) and (d_year = 1999) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query92.q.out b/ql/src/test/results/clientpositive/perf/tez/query92.q.out index 061fcf729d..4b4afa9a95 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query92.q.out @@ -59,34 +59,31 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 7 (BROADCAST_EDGE) -Map 12 <- Reducer 11 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) -Reducer 10 <- Map 13 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) -Reducer 11 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 10 (ONE_TO_ONE_EDGE), Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 11 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) -Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 9 <- Reducer 8 (SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (SIMPLE_EDGE) +Reducer 7 <- Map 10 (SIMPLE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 5 vectorized - File Output Operator [FS_142] - Limit [LIM_141] (rows=1 width=112) + File Output Operator [FS_135] + Limit [LIM_134] (rows=1 width=112) Number of rows:100 - Select Operator [SEL_140] (rows=1 width=112) + Select Operator [SEL_133] (rows=1 width=112) Output:["_col0"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] - Select Operator [SEL_138] (rows=1 width=112) + SHUFFLE [RS_132] + Select Operator [SEL_131] (rows=1 width=112) Output:["_col1"] - Group By Operator [GBY_137] (rows=1 width=112) + Group By Operator [GBY_130] (rows=1 width=112) Output:["_col0"],aggregations:["sum(VALUE._col0)"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_36] @@ -96,103 +93,78 @@ Stage-0 Output:["_col2"] Filter Operator [FIL_33] (rows=58081078 width=135) predicate:(_col2 > CAST( (1.3 * _col6) AS decimal(14,7))) - Merge Join Operator [MERGEJOIN_105] (rows=174243235 width=135) + Merge Join Operator [MERGEJOIN_107] (rows=174243235 width=135) Conds:RS_30._col1=RS_31._col2(Inner),Output:["_col2","_col6"] - <-Reducer 10 [ONE_TO_ONE_EDGE] - FORWARD [RS_31] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_104] (rows=87121617 width=135) - Conds:RS_136._col0=RS_125._col0(Inner),Output:["_col1","_col2"] - <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_125] - PartitionCols:_col0 - Select Operator [SEL_124] (rows=231000 width=1436) - Output:["_col0"] - Filter Operator [FIL_123] (rows=231000 width=1436) - predicate:((i_manufact_id = 269) and i_item_sk is not null) - TableScan [TS_20] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] - <-Reducer 9 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_136] - PartitionCols:_col0 - Select Operator [SEL_135] (rows=79201469 width=135) - Output:["_col0","_col1"] - Group By Operator [GBY_134] (rows=79201469 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col0 - Group By Operator [GBY_16] (rows=158402938 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 - Merge Join Operator [MERGEJOIN_103] (rows=158402938 width=135) - Conds:RS_133._col0=RS_110._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] - PartitionCols:_col0 - Select Operator [SEL_107] (rows=8116 width=1119) - Output:["_col0"] - Filter Operator [FIL_106] (rows=8116 width=1119) - predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] - PartitionCols:_col0 - Select Operator [SEL_132] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_131] (rows=144002668 width=135) - predicate:((ws_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) - TableScan [TS_6] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_discount_amt"] - <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_122] - Group By Operator [GBY_121] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] - Group By Operator [GBY_113] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_111] (rows=8116 width=1119) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_107] - <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_130] - Group By Operator [GBY_129] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_128] - Group By Operator [GBY_127] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_126] (rows=231000 width=1436) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_124] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_102] (rows=158402938 width=135) - Conds:RS_120._col0=RS_108._col0(Inner),Output:["_col1","_col2"] - <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_108] + Merge Join Operator [MERGEJOIN_104] (rows=158402938 width=135) + Conds:RS_126._col0=RS_110._col0(Inner),Output:["_col1","_col2"] + <-Map 8 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_110] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_107] + Select Operator [SEL_109] (rows=8116 width=1119) + Output:["_col0"] + Filter Operator [FIL_108] (rows=8116 width=1119) + predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_119] (rows=144002668 width=135) + Select Operator [SEL_125] (rows=144002668 width=135) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_118] (rows=144002668 width=135) - predicate:((ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) + Filter Operator [FIL_124] (rows=144002668 width=135) + predicate:((ws_item_sk BETWEEN DynamicValue(RS_24_item_i_item_sk_min) AND DynamicValue(RS_24_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_24_item_i_item_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_28_date_dim_d_date_sk_min) AND DynamicValue(RS_28_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_28_date_dim_d_date_sk_bloom_filter))) and ws_item_sk is not null and ws_sold_date_sk is not null) TableScan [TS_0] (rows=144002668 width=135) default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_discount_amt"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_117] - Group By Operator [GBY_116] (rows=1 width=12) + <-Reducer 11 [BROADCAST_EDGE] vectorized + BROADCAST [RS_123] + Group By Operator [GBY_122] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized + SHUFFLE [RS_121] + Group By Operator [GBY_120] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_119] (rows=231000 width=1436) + Output:["_col0"] + Select Operator [SEL_117] (rows=231000 width=1436) + Output:["_col0"] + Filter Operator [FIL_116] (rows=231000 width=1436) + predicate:((i_manufact_id = 269) and i_item_sk is not null) + TableScan [TS_20] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"] + <-Reducer 9 [BROADCAST_EDGE] vectorized + BROADCAST [RS_115] + Group By Operator [GBY_114] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_113] Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_109] (rows=8116 width=1119) + Select Operator [SEL_111] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_107] + Please refer to the previous Select Operator [SEL_109] + <-Reducer 7 [ONE_TO_ONE_EDGE] + FORWARD [RS_31] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_106] (rows=87121617 width=135) + Conds:RS_129._col0=RS_118._col0(Inner),Output:["_col1","_col2"] + <-Map 10 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_118] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_117] + <-Reducer 6 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_129] + PartitionCols:_col0 + Select Operator [SEL_128] (rows=79201469 width=135) + Output:["_col0","_col1"] + Group By Operator [GBY_127] (rows=79201469 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col0 + Group By Operator [GBY_16] (rows=158402938 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 + Please refer to the previous Merge Join Operator [MERGEJOIN_104] diff --git a/ql/src/test/results/clientpositive/perf/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tez/query94.q.out index 5d19a1634b..396be11245 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query94.q.out @@ -76,22 +76,22 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_174] - Limit [LIM_173] (rows=1 width=344) + File Output Operator [FS_176] + Limit [LIM_175] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_172] (rows=1 width=344) + Select Operator [SEL_174] (rows=1 width=344) Output:["_col0","_col1","_col2"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_171] - Select Operator [SEL_170] (rows=1 width=344) + SHUFFLE [RS_173] + Select Operator [SEL_172] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_169] (rows=1 width=344) + Group By Operator [GBY_171] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_168] - Group By Operator [GBY_167] (rows=1 width=344) + PARTITION_ONLY_SHUFFLE [RS_170] + Group By Operator [GBY_169] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_166] (rows=115958879 width=135) + Group By Operator [GBY_168] (rows=115958879 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_74] @@ -102,21 +102,21 @@ Stage-0 Output:["_col4","_col5","_col6"] Filter Operator [FIL_41] (rows=115958879 width=135) predicate:_col14 is null - Merge Join Operator [MERGEJOIN_128] (rows=231917759 width=135) - Conds:RS_38._col4=RS_165._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] + Merge Join Operator [MERGEJOIN_130] (rows=231917759 width=135) + Conds:RS_38._col4=RS_167._col0(Left Outer),Output:["_col4","_col5","_col6","_col14"] <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_165] + FORWARD [RS_167] PartitionCols:_col0 - Select Operator [SEL_164] (rows=7199233 width=92) + Select Operator [SEL_166] (rows=7199233 width=92) Output:["_col0","_col1"] - Group By Operator [GBY_163] (rows=7199233 width=92) + Group By Operator [GBY_165] (rows=7199233 width=92) Output:["_col0"],keys:KEY._col0 <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_164] PartitionCols:_col0 - Group By Operator [GBY_161] (rows=14398467 width=92) + Group By Operator [GBY_163] (rows=14398467 width=92) Output:["_col0"],keys:wr_order_number - Filter Operator [FIL_160] (rows=14398467 width=92) + Filter Operator [FIL_162] (rows=14398467 width=92) predicate:wr_order_number is not null TableScan [TS_25] (rows=14398467 width=92) default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] @@ -125,101 +125,101 @@ Stage-0 PartitionCols:_col4 Select Operator [SEL_37] (rows=210834322 width=135) Output:["_col4","_col5","_col6"] - Merge Join Operator [MERGEJOIN_127] (rows=210834322 width=135) - Conds:RS_34._col4=RS_159._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} + Merge Join Operator [MERGEJOIN_129] (rows=210834322 width=135) + Conds:RS_34._col4=RS_161._col0(Left Semi),Output:["_col3","_col4","_col5","_col6","_col14"],residual filter predicates:{(_col3 <> _col14)} <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_161] PartitionCols:_col0 - Group By Operator [GBY_158] (rows=144002668 width=135) + Group By Operator [GBY_160] (rows=144002668 width=135) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_157] (rows=144002668 width=135) + Select Operator [SEL_159] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=144002668 width=135) + Filter Operator [FIL_158] (rows=144002668 width=135) predicate:(ws_order_number is not null and ws_warehouse_sk is not null) TableScan [TS_22] (rows=144002668 width=135) default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_126] (rows=191667562 width=135) - Conds:RS_18._col2=RS_147._col0(Inner),Output:["_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_128] (rows=191667562 width=135) + Conds:RS_18._col2=RS_149._col0(Inner),Output:["_col3","_col4","_col5","_col6"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_147] + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_146] (rows=42 width=1850) + Select Operator [SEL_148] (rows=42 width=1850) Output:["_col0"] - Filter Operator [FIL_145] (rows=42 width=1850) + Filter Operator [FIL_147] (rows=42 width=1850) predicate:((web_company_name = 'pri') and web_site_sk is not null) TableScan [TS_9] (rows=84 width=1850) default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_125] (rows=174243235 width=135) - Conds:RS_15._col1=RS_139._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_127] (rows=174243235 width=135) + Conds:RS_15._col1=RS_141._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] + SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_138] (rows=20000000 width=1014) + Select Operator [SEL_140] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_137] (rows=20000000 width=1014) + Filter Operator [FIL_139] (rows=20000000 width=1014) predicate:((ca_state = 'TX') and ca_address_sk is not null) TableScan [TS_6] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_124] (rows=158402938 width=135) - Conds:RS_155._col0=RS_131._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_126] (rows=158402938 width=135) + Conds:RS_157._col0=RS_133._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] + SHUFFLE [RS_133] PartitionCols:_col0 - Select Operator [SEL_130] (rows=8116 width=1119) + Select Operator [SEL_132] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_129] (rows=8116 width=1119) + Filter Operator [FIL_131] (rows=8116 width=1119) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_155] + SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_154] (rows=144002668 width=135) + Select Operator [SEL_156] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_153] (rows=144002668 width=135) + Filter Operator [FIL_155] (rows=144002668 width=135) predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_16_customer_address_ca_address_sk_min) AND DynamicValue(RS_16_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_16_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_19_web_site_web_site_sk_min) AND DynamicValue(RS_19_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_19_web_site_web_site_sk_bloom_filter))) and ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) TableScan [TS_0] (rows=144002668 width=135) default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_136] - Group By Operator [GBY_135] (rows=1 width=12) + BROADCAST [RS_138] + Group By Operator [GBY_137] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) + SHUFFLE [RS_136] + Group By Operator [GBY_135] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_132] (rows=8116 width=1119) + Select Operator [SEL_134] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_130] + Please refer to the previous Select Operator [SEL_132] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_144] - Group By Operator [GBY_143] (rows=1 width=12) + BROADCAST [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_142] - Group By Operator [GBY_141] (rows=1 width=12) + SHUFFLE [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_140] (rows=20000000 width=1014) + Select Operator [SEL_142] (rows=20000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_138] + Please refer to the previous Select Operator [SEL_140] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_152] - Group By Operator [GBY_151] (rows=1 width=12) + BROADCAST [RS_154] + Group By Operator [GBY_153] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_150] - Group By Operator [GBY_149] (rows=1 width=12) + SHUFFLE [RS_152] + Group By Operator [GBY_151] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_148] (rows=42 width=1850) + Select Operator [SEL_150] (rows=42 width=1850) Output:["_col0"] - Please refer to the previous Select Operator [SEL_146] + Please refer to the previous Select Operator [SEL_148] diff --git a/ql/src/test/results/clientpositive/perf/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tez/query95.q.out index 400cc19581..3a8ed092fb 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query95.q.out @@ -63,22 +63,22 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE) -Map 19 <- Reducer 25 (BROADCAST_EDGE) -Map 23 <- Reducer 25 (BROADCAST_EDGE) +Map 1 <- Reducer 10 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 23 (BROADCAST_EDGE) +Map 15 <- Reducer 23 (BROADCAST_EDGE) +Map 21 <- Reducer 23 (BROADCAST_EDGE) Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 18 (SIMPLE_EDGE) -Reducer 17 <- Reducer 16 (SIMPLE_EDGE) +Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) +Reducer 17 <- Map 22 (SIMPLE_EDGE), Reducer 16 (ONE_TO_ONE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE) +Reducer 19 <- Map 15 (SIMPLE_EDGE), Map 21 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Map 19 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 21 <- Map 24 (SIMPLE_EDGE), Reducer 20 (ONE_TO_ONE_EDGE) -Reducer 22 <- Reducer 21 (SIMPLE_EDGE) -Reducer 25 <- Map 24 (CUSTOM_SIMPLE_EDGE) +Reducer 20 <- Reducer 19 (SIMPLE_EDGE) +Reducer 23 <- Map 22 (CUSTOM_SIMPLE_EDGE) Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Map 13 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 17 (ONE_TO_ONE_EDGE), Reducer 22 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 5 <- Reducer 18 (ONE_TO_ONE_EDGE), Reducer 20 (ONE_TO_ONE_EDGE), Reducer 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 8 <- Reducer 7 (SIMPLE_EDGE) @@ -88,208 +88,201 @@ Stage-0 limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_273] - Limit [LIM_272] (rows=1 width=344) + File Output Operator [FS_286] + Limit [LIM_285] (rows=1 width=344) Number of rows:100 - Select Operator [SEL_271] (rows=1 width=344) + Select Operator [SEL_284] (rows=1 width=344) Output:["_col0","_col1","_col2"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_270] - Select Operator [SEL_269] (rows=1 width=344) + SHUFFLE [RS_283] + Select Operator [SEL_282] (rows=1 width=344) Output:["_col1","_col2","_col3"] - Group By Operator [GBY_268] (rows=1 width=344) + Group By Operator [GBY_281] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_267] - Group By Operator [GBY_266] (rows=1 width=344) + PARTITION_ONLY_SHUFFLE [RS_280] + Group By Operator [GBY_279] (rows=1 width=344) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","sum(_col1)","sum(_col2)"] - Group By Operator [GBY_265] (rows=421668645 width=135) + Group By Operator [GBY_278] (rows=421668645 width=135) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_116] PartitionCols:_col0 Group By Operator [GBY_115] (rows=421668645 width=135) Output:["_col0","_col2","_col3"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col3 - Merge Join Operator [MERGEJOIN_212] (rows=421668645 width=135) - Conds:RS_58._col3=RS_247._col0(Inner),RS_58._col3=RS_264._col0(Inner),Output:["_col3","_col4","_col5"] - <-Reducer 17 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_247] + Merge Join Operator [MERGEJOIN_228] (rows=421668645 width=135) + Conds:RS_58._col3=RS_277._col0(Inner),RS_58._col3=RS_275._col0(Inner),Output:["_col3","_col4","_col5"] + <-Reducer 18 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_275] PartitionCols:_col0 - Group By Operator [GBY_246] (rows=79201469 width=135) + Group By Operator [GBY_274] (rows=87121617 width=135) Output:["_col0"],keys:KEY._col0 - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Group By Operator [GBY_23] (rows=158402938 width=135) - Output:["_col0"],keys:_col1 - Select Operator [SEL_22] (rows=158402938 width=135) - Output:["_col1"] - Filter Operator [FIL_21] (rows=158402938 width=135) - predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_209] (rows=158402938 width=135) - Conds:RS_242._col1=RS_245._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] - PartitionCols:_col1 - Select Operator [SEL_241] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_240] (rows=144002668 width=135) - predicate:ws_order_number is not null - TableScan [TS_12] (rows=144002668 width=135) - default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_245] - PartitionCols:_col1 - Select Operator [SEL_244] (rows=144002668 width=135) - Output:["_col0","_col1"] - Filter Operator [FIL_243] (rows=144002668 width=135) - predicate:ws_order_number is not null - TableScan [TS_15] (rows=144002668 width=135) - default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized - FORWARD [RS_264] - PartitionCols:_col0 - Group By Operator [GBY_263] (rows=87121617 width=135) - Output:["_col0"],keys:KEY._col0 - <-Reducer 21 [SIMPLE_EDGE] + <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col0 Group By Operator [GBY_45] (rows=174243235 width=135) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_211] (rows=174243235 width=135) - Conds:RS_41._col0=RS_250._col0(Inner),Output:["_col1"] - <-Map 24 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_250] + Merge Join Operator [MERGEJOIN_227] (rows=174243235 width=135) + Conds:RS_41._col0=RS_255._col0(Inner),Output:["_col1"] + <-Map 22 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_255] PartitionCols:_col0 - Select Operator [SEL_249] (rows=14398467 width=92) + Select Operator [SEL_254] (rows=14398467 width=92) Output:["_col0"] - Filter Operator [FIL_248] (rows=14398467 width=92) + Filter Operator [FIL_253] (rows=14398467 width=92) predicate:wr_order_number is not null TableScan [TS_38] (rows=14398467 width=92) default@web_returns,web_returns,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"] - <-Reducer 20 [ONE_TO_ONE_EDGE] + <-Reducer 16 [ONE_TO_ONE_EDGE] FORWARD [RS_41] PartitionCols:_col0 Select Operator [SEL_37] (rows=158402938 width=135) Output:["_col0"] Filter Operator [FIL_36] (rows=158402938 width=135) predicate:(_col0 <> _col2) - Merge Join Operator [MERGEJOIN_210] (rows=158402938 width=135) - Conds:RS_259._col1=RS_262._col1(Inner),Output:["_col0","_col1","_col2"] - <-Map 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] + Merge Join Operator [MERGEJOIN_226] (rows=158402938 width=135) + Conds:RS_268._col1=RS_272._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_268] PartitionCols:_col1 - Select Operator [SEL_258] (rows=144002668 width=135) + Select Operator [SEL_267] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_257] (rows=144002668 width=135) + Filter Operator [FIL_266] (rows=144002668 width=135) predicate:((ws_order_number BETWEEN DynamicValue(RS_42_web_returns_wr_order_number_min) AND DynamicValue(RS_42_web_returns_wr_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_42_web_returns_wr_order_number_bloom_filter))) and ws_order_number is not null) TableScan [TS_27] (rows=144002668 width=135) default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_255] - Group By Operator [GBY_254] (rows=1 width=12) + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_261] + Group By Operator [GBY_259] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=14398467)"] - <-Map 24 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_253] - Group By Operator [GBY_252] (rows=1 width=12) + <-Map 22 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_258] + Group By Operator [GBY_257] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=14398467)"] - Select Operator [SEL_251] (rows=14398467 width=92) + Select Operator [SEL_256] (rows=14398467 width=92) Output:["_col0"] - Please refer to the previous Select Operator [SEL_249] - <-Map 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_262] + Please refer to the previous Select Operator [SEL_254] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_272] PartitionCols:_col1 - Select Operator [SEL_261] (rows=144002668 width=135) + Select Operator [SEL_271] (rows=144002668 width=135) Output:["_col0","_col1"] - Filter Operator [FIL_260] (rows=144002668 width=135) + Filter Operator [FIL_270] (rows=144002668 width=135) predicate:((ws_order_number BETWEEN DynamicValue(RS_42_web_returns_wr_order_number_min) AND DynamicValue(RS_42_web_returns_wr_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_42_web_returns_wr_order_number_bloom_filter))) and ws_order_number is not null) TableScan [TS_30] (rows=144002668 width=135) default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"] - <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Please refer to the previous Group By Operator [GBY_254] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_262] + Please refer to the previous Group By Operator [GBY_259] + <-Reducer 20 [ONE_TO_ONE_EDGE] vectorized + FORWARD [RS_277] + PartitionCols:_col0 + Group By Operator [GBY_276] (rows=79201469 width=135) + Output:["_col0"],keys:KEY._col0 + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Group By Operator [GBY_23] (rows=158402938 width=135) + Output:["_col0"],keys:_col1 + Select Operator [SEL_22] (rows=158402938 width=135) + Output:["_col1"] + Filter Operator [FIL_21] (rows=158402938 width=135) + predicate:(_col0 <> _col2) + Merge Join Operator [MERGEJOIN_225] (rows=158402938 width=135) + Conds:RS_269._col1=RS_273._col1(Inner),Output:["_col0","_col1","_col2"] + <-Map 15 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_269] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_267] + <-Map 21 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_273] + PartitionCols:_col1 + Please refer to the previous Select Operator [SEL_271] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_208] (rows=191667562 width=135) - Conds:RS_55._col2=RS_231._col0(Inner),Output:["_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_224] (rows=191667562 width=135) + Conds:RS_55._col2=RS_247._col0(Inner),Output:["_col3","_col4","_col5"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] + SHUFFLE [RS_247] PartitionCols:_col0 - Select Operator [SEL_230] (rows=42 width=1850) + Select Operator [SEL_246] (rows=42 width=1850) Output:["_col0"] - Filter Operator [FIL_229] (rows=42 width=1850) + Filter Operator [FIL_245] (rows=42 width=1850) predicate:((web_company_name = 'pri') and web_site_sk is not null) TableScan [TS_9] (rows=84 width=1850) default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_207] (rows=174243235 width=135) - Conds:RS_52._col1=RS_223._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_223] (rows=174243235 width=135) + Conds:RS_52._col1=RS_239._col0(Inner),Output:["_col2","_col3","_col4","_col5"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] + SHUFFLE [RS_239] PartitionCols:_col0 - Select Operator [SEL_222] (rows=20000000 width=1014) + Select Operator [SEL_238] (rows=20000000 width=1014) Output:["_col0"] - Filter Operator [FIL_221] (rows=20000000 width=1014) + Filter Operator [FIL_237] (rows=20000000 width=1014) predicate:((ca_state = 'TX') and ca_address_sk is not null) TableScan [TS_6] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_206] (rows=158402938 width=135) - Conds:RS_239._col0=RS_215._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_222] (rows=158402938 width=135) + Conds:RS_265._col0=RS_231._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] + SHUFFLE [RS_231] PartitionCols:_col0 - Select Operator [SEL_214] (rows=8116 width=1119) + Select Operator [SEL_230] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_213] (rows=8116 width=1119) + Filter Operator [FIL_229] (rows=8116 width=1119) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + SHUFFLE [RS_265] PartitionCols:_col0 - Select Operator [SEL_238] (rows=144002668 width=135) + Select Operator [SEL_264] (rows=144002668 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_237] (rows=144002668 width=135) - predicate:((ws_ship_addr_sk BETWEEN DynamicValue(RS_53_customer_address_ca_address_sk_min) AND DynamicValue(RS_53_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_53_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_56_web_site_web_site_sk_min) AND DynamicValue(RS_56_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_56_web_site_web_site_sk_bloom_filter))) and ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) + Filter Operator [FIL_263] (rows=144002668 width=135) + predicate:((ws_order_number BETWEEN DynamicValue(RS_42_web_returns_wr_order_number_min) AND DynamicValue(RS_42_web_returns_wr_order_number_max) and in_bloom_filter(ws_order_number, DynamicValue(RS_42_web_returns_wr_order_number_bloom_filter))) and (ws_ship_addr_sk BETWEEN DynamicValue(RS_53_customer_address_ca_address_sk_min) AND DynamicValue(RS_53_customer_address_ca_address_sk_max) and in_bloom_filter(ws_ship_addr_sk, DynamicValue(RS_53_customer_address_ca_address_sk_bloom_filter))) and (ws_ship_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ws_ship_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) and (ws_web_site_sk BETWEEN DynamicValue(RS_56_web_site_web_site_sk_min) AND DynamicValue(RS_56_web_site_web_site_sk_max) and in_bloom_filter(ws_web_site_sk, DynamicValue(RS_56_web_site_web_site_sk_bloom_filter))) and ws_order_number is not null and ws_ship_addr_sk is not null and ws_ship_date_sk is not null and ws_web_site_sk is not null) TableScan [TS_0] (rows=144002668 width=135) default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"] + <-Reducer 23 [BROADCAST_EDGE] vectorized + BROADCAST [RS_260] + Please refer to the previous Group By Operator [GBY_259] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_220] - Group By Operator [GBY_219] (rows=1 width=12) + BROADCAST [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_218] - Group By Operator [GBY_217] (rows=1 width=12) + SHUFFLE [RS_234] + Group By Operator [GBY_233] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_216] (rows=8116 width=1119) + Select Operator [SEL_232] (rows=8116 width=1119) Output:["_col0"] - Please refer to the previous Select Operator [SEL_214] + Please refer to the previous Select Operator [SEL_230] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_228] - Group By Operator [GBY_227] (rows=1 width=12) + BROADCAST [RS_244] + Group By Operator [GBY_243] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=20000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] - Group By Operator [GBY_225] (rows=1 width=12) + SHUFFLE [RS_242] + Group By Operator [GBY_241] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=20000000)"] - Select Operator [SEL_224] (rows=20000000 width=1014) + Select Operator [SEL_240] (rows=20000000 width=1014) Output:["_col0"] - Please refer to the previous Select Operator [SEL_222] + Please refer to the previous Select Operator [SEL_238] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) + BROADCAST [RS_252] + Group By Operator [GBY_251] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] - Group By Operator [GBY_233] (rows=1 width=12) + SHUFFLE [RS_250] + Group By Operator [GBY_249] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_232] (rows=42 width=1850) + Select Operator [SEL_248] (rows=42 width=1850) Output:["_col0"] - Please refer to the previous Select Operator [SEL_230] + Please refer to the previous Select Operator [SEL_246] diff --git a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out index eafc1c4a00..a1414090f3 100644 --- a/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_3.q.out @@ -366,7 +366,7 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Spark Partition Pruning Sink Operator - Target Columns: [Map 4 -> [part_col:int (part_col)]] + Target Columns: [Map 1 -> [part_col:int (part_col)], Map 4 -> [part_col:int (part_col)]] Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work @@ -432,7 +432,6 @@ STAGE PLANS: Map Operator Tree: TableScan alias: partitioned_table1 - filterExpr: (part_col > 1) (type: boolean) Statistics: Num rows: 12 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: part_col (type: int)