diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 99c26ce..9659eab 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1645,6 +1645,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "If the skew information is correctly stored in the metadata, hive.optimize.skewjoin.compiletime\n" + "would change the query plan to take care of it, and hive.optimize.skewjoin will be a no-op."), + HIVE_SHARED_SCAN_OPTIMIZATION("hive.optimize.shared.scan", true, + "Whether to enable shared scan optimizer"), + // CTE HIVE_CTE_MATERIALIZE_THRESHOLD("hive.optimize.cte.materialize.threshold", -1, "If the number of references to a CTE clause exceeds this threshold, Hive will materialize it\n" + diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index d0fdb52..59b8799 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -281,7 +281,8 @@ private void convertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcContext cont joinOp.getConf().getBaseSrc(), joinOp).getSecond(), null, joinDesc.getExprs(), null, null, joinDesc.getOutputColumnNames(), mapJoinConversionPos, joinDesc.getConds(), - joinDesc.getFilters(), joinDesc.getNoOuterJoin(), null, joinDesc.getNoConditionalTaskSize()); + joinDesc.getFilters(), joinDesc.getNoOuterJoin(), null, + joinDesc.getNoConditionalTaskSize(), joinDesc.getInMemoryDataSize()); mapJoinDesc.setNullSafes(joinDesc.getNullSafes()); mapJoinDesc.setFilterMap(joinDesc.getFilterMap()); mapJoinDesc.setResidualFilterExprs(joinDesc.getResidualFilterExprs()); @@ -419,7 +420,6 @@ private boolean checkConvertJoinSMBJoin(JoinOperator joinOp, OptimizeTezProcCont // each side better have 0 or more RS. if either side is unbalanced, cannot convert. // This is a workaround for now. Right fix would be to refactor code in the // MapRecordProcessor and ReduceRecordProcessor with respect to the sources. - @SuppressWarnings({"rawtypes","unchecked"}) Set set = OperatorUtils.findOperatorsUpstream(parentOp.getParentOperators(), ReduceSinkOperator.class); @@ -719,6 +719,8 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c } + joinOp.getConf().setInMemoryDataSize(totalSize/buckets); + return bigTablePosition; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index 85d46f3..f01fb9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -29,8 +29,6 @@ import java.util.Set; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; @@ -78,6 +76,8 @@ import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Implementation of one of the rule-based map join optimization. User passes hints to specify @@ -432,7 +432,7 @@ public static MapJoinOperator convertSMBJoinToMapJoin(HiveConf hconf, smbJoinDesc.getOutputColumnNames(), bigTablePos, smbJoinDesc.getConds(), smbJoinDesc.getFilters(), smbJoinDesc.isNoOuterJoin(), smbJoinDesc.getDumpFilePrefix(), - smbJoinDesc.getNoConditionalTaskSize()); + smbJoinDesc.getNoConditionalTaskSize(), smbJoinDesc.getInMemoryDataSize()); mapJoinDesc.setStatistics(smbJoinDesc.getStatistics()); @@ -1184,8 +1184,9 @@ public static MapJoinDesc getMapJoinDesc(HiveConf hconf, JoinCondDesc[] joinCondns = op.getConf().getConds(); MapJoinDesc mapJoinDescriptor = new MapJoinDesc(keyExprMap, keyTableDesc, newValueExprs, valueTableDescs, - valueFilteredTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, op - .getConf().getNoOuterJoin(), dumpFilePrefix, op.getConf().getNoConditionalTaskSize()); + valueFilteredTableDescs, outputColumnNames, mapJoinPos, joinCondns, filters, + op.getConf().getNoOuterJoin(), dumpFilePrefix, + op.getConf().getNoConditionalTaskSize(), op.getConf().getInMemoryDataSize()); mapJoinDescriptor.setStatistics(op.getConf().getStatistics()); mapJoinDescriptor.setTagOrder(tagOrder); mapJoinDescriptor.setNullSafes(desc.getNullSafes()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedScanOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedScanOptimizer.java new file mode 100644 index 0000000..bb909dc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedScanOptimizer.java @@ -0,0 +1,437 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo; +import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.stats.StatsUtils; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.collect.ArrayListMultimap; +import com.google.common.collect.Lists; +import com.google.common.collect.Multimap; + +/** + * Shared scan optimizer. + */ +public class SharedScanOptimizer extends Transform { + + private final static Logger LOG = LoggerFactory.getLogger(SharedScanOptimizer.class); + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + + final Map topOps = pctx.getTopOps(); + if (topOps.size() < 2) { + // Nothing to do, bail out + return pctx; + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Plan before shared scan optimizer:\n" + Operator.toString(pctx.getTopOps().values())); + } + + // We will not apply this optimization on some table scan operators. + Set excludeTableScanOps = gatherNotValidTableScanOps(pctx); + LOG.debug("Exclude TableScan ops: {}", excludeTableScanOps); + + Multimap> tableNameToOps = splitTableScanOpsByTable(pctx); + + // We enforce a certain order when we do the reutilization. + // In particular, we use size of table x number of reads to + // rank the tables. + List> sortedTables = rankTablesByAccumulatedSize(pctx, excludeTableScanOps); + LOG.debug("Sorted tables by size: {}", sortedTables); + + // Execute optimization + Multimap existingOps = ArrayListMultimap.create(); + Set entriesToRemove = new HashSet<>(); + for (Entry tablePair : sortedTables) { + for (Entry tableScanOpPair : tableNameToOps.get(tablePair.getKey())) { + TableScanOperator tsOp = tableScanOpPair.getValue(); + String tableName = tsOp.getConf().getTableMetadata().getDbName() + "." + + tsOp.getConf().getTableMetadata().getTableName(); + Collection prevTsOps = existingOps.get(tableName); + if (!prevTsOps.isEmpty()) { + for (TableScanOperator prevTsOp : prevTsOps) { + + // First we check if the two table scan operators can actually be merged + // If schemas do not match, we currently do not merge + List prevTsOpNeededColumns = prevTsOp.getNeededColumns(); + List tsOpNeededColumns = tsOp.getNeededColumns(); + if (prevTsOpNeededColumns.size() != tsOpNeededColumns.size()) { + // Skip + continue; + } + boolean notEqual = false; + for (int i = 0; i < prevTsOpNeededColumns.size(); i++) { + if (!prevTsOpNeededColumns.get(i).equals(tsOpNeededColumns.get(i))) { + notEqual = true; + break; + } + } + if (notEqual) { + // Skip + continue; + } + // If row limit does not match, we currently do not merge + if (prevTsOp.getConf().getRowLimit() != tsOp.getConf().getRowLimit()) { + // Skip + continue; + } + + // It seems these two operators can be merged. + // Check that plan meets some preconditions before doing it. + // In particular, in the presence of map joins in the upstream plan: + // - we cannot exceed the noconditional task size, and + // - if we already merged the big table, we cannot merge the broadcast + // tables. + if (!validPreConditions(pctx, prevTsOp, tsOp)) { + // Skip + LOG.debug("{} does not meet preconditions", tsOp); + continue; + } + + // We can merge + ExprNodeGenericFuncDesc exprNode = null; + if (prevTsOp.getConf().getFilterExpr() != null) { + // Push filter on top of children + pushFilterToTopOfTableScan(prevTsOp); + // Clone to push to table scan + exprNode = (ExprNodeGenericFuncDesc) prevTsOp.getConf().getFilterExpr(); + } + if (tsOp.getConf().getFilterExpr() != null) { + // Push filter on top + pushFilterToTopOfTableScan(tsOp); + ExprNodeGenericFuncDesc tsExprNode = tsOp.getConf().getFilterExpr(); + if (exprNode != null && !exprNode.isSame(tsExprNode)) { + if (exprNode.getGenericUDF() instanceof GenericUDFOPOr) { + List newChildren = new ArrayList<>(exprNode.getChildren().size() + 1); + for (ExprNodeDesc childExprNode : exprNode.getChildren()) { + if (childExprNode.isSame(tsExprNode)) { + // We do not need to do anything, it is in the OR expression + break; + } + newChildren.add(childExprNode); + } + if (exprNode.getChildren().size() == newChildren.size()) { + newChildren.add(tsExprNode); + exprNode = ExprNodeGenericFuncDesc.newInstance( + new GenericUDFOPOr(), + newChildren); + } + } else { + exprNode = ExprNodeGenericFuncDesc.newInstance( + new GenericUDFOPOr(), + Arrays.asList(exprNode, tsExprNode)); + } + } + } + // Replace filter + prevTsOp.getConf().setFilterExpr(exprNode); + // Replace table scan operator + List> allChildren = + Lists.newArrayList(tsOp.getChildOperators()); + for (Operator op : allChildren) { + tsOp.removeChild(op); + op.getParentOperators().remove(tsOp); + prevTsOp.getChildOperators().add(op); + op.getParentOperators().add(prevTsOp); + } + entriesToRemove.add(tableScanOpPair.getKey()); + break; + } + if (!entriesToRemove.contains(tableScanOpPair.getKey())) { + existingOps.put(tableName, tsOp); + } + } else { + // Add to existing ops + existingOps.put(tableName, tsOp); + } + } + } + // Remove unused operators + for (String key : entriesToRemove) { + topOps.remove(key); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Plan after shared scan optimizer:\n" + Operator.toString(pctx.getTopOps().values())); + } + + return pctx; + } + + private static Set gatherNotValidTableScanOps(ParseContext pctx) { + // Find table scan operators with partition pruning enabled in plan. + // These can be: + // 1) TS with static partitioning. + // 2) TS with DPP. + // 3) TS with semijoin DPP. + Set notValidTableScanOps = new HashSet<>(); + // 1) TS with static partitioning. + Map topOps = pctx.getTopOps(); + for (TableScanOperator tsOp : topOps.values()) { + if (tsOp.getConf().getPartColumns() != null && + !tsOp.getConf().getPartColumns().isEmpty()) { + notValidTableScanOps.add(tsOp); + } + } + // 2) TS with DPP. + Collection> tableScanOps = + Lists.>newArrayList(topOps.values()); + Set s = + OperatorUtils.findOperators(tableScanOps, AppMasterEventOperator.class); + for (AppMasterEventOperator a : s) { + if (a.getConf() instanceof DynamicPruningEventDesc) { + notValidTableScanOps.add(((DynamicPruningEventDesc) a.getConf()).getTableScan()); + } + } + // 3) TS with semijoin DPP. + for (SemiJoinBranchInfo sjbi : pctx.getRsToSemiJoinBranchInfo().values()) { + notValidTableScanOps.add(sjbi.getTsOp()); + } + return notValidTableScanOps; + } + + private static Multimap> splitTableScanOpsByTable( + ParseContext pctx) { + Multimap> tableNameToOps = ArrayListMultimap.create(); + for (Entry e : pctx.getTopOps().entrySet()) { + TableScanOperator tsOp = e.getValue(); + tableNameToOps.put( + tsOp.getConf().getTableMetadata().getDbName() + "." + + tsOp.getConf().getTableMetadata().getTableName(), e); + } + return tableNameToOps; + } + + private static List> rankTablesByAccumulatedSize(ParseContext pctx, + Set excludeTables) { + Map tableToTotalSize = new HashMap<>(); + for (Entry e : pctx.getTopOps().entrySet()) { + TableScanOperator tsOp = e.getValue(); + if (excludeTables.contains(tsOp)) { + // Skip operator, currently we do not merge + continue; + } + String tableName = tsOp.getConf().getTableMetadata().getDbName() + "." + + tsOp.getConf().getTableMetadata().getTableName(); + long tableSize = tsOp.getStatistics() != null ? + tsOp.getStatistics().getDataSize() : 0L; + Long totalSize = tableToTotalSize.get(tableName); + if (totalSize != null) { + tableToTotalSize.put(tableName, + StatsUtils.safeAdd(totalSize, tableSize)); + } else { + tableToTotalSize.put(tableName, tableSize); + } + } + List> sortedTables = + new LinkedList<>(tableToTotalSize.entrySet()); + Collections.sort(sortedTables, Collections.reverseOrder( + new Comparator>() { + public int compare(Map.Entry o1, Map.Entry o2) { + return (o1.getValue()).compareTo(o2.getValue()); + } + })); + return sortedTables; + } + + private static boolean validPreConditions(ParseContext pctx, + TableScanOperator prevTsOp, TableScanOperator tsOp) { + // First we check whether output stages when we merge the operators + // will collide. + // If we do, we cannot merge. + final Set> outputStagesOps1 = findOutputStagesOperators(prevTsOp); + final Set> outputStagesOps2 = findOutputStagesOperators(tsOp); + if (!Collections.disjoint(outputStagesOps1, outputStagesOps2)) { + // We cannot merge + return false; + } + // Secondly, we check whether we will end up with same operators + // inputing on same stage. + // If we do, we cannot merge. + final Set> inputStagesOps1 = findInputStagesOperators(prevTsOp); + final Set> inputStagesOps2 = findInputStagesOperators(tsOp); + if (!Collections.disjoint(inputStagesOps1, inputStagesOps2)) { + // We cannot merge + return false; + } + // Third, we check whether one of the operators is part of a stage + // that is an input for the stage of the other operator. + // If we do, we cannot merge. + final Set> stageOps1 = findStageOperators(prevTsOp); + final Set> stageOps2 = findStageOperators(tsOp); + if (!Collections.disjoint(stageOps1, inputStagesOps2) + || !Collections.disjoint(inputStagesOps1, stageOps2)) { + return false; + } + // Finally, we check whether merging the stages would cause the size of + // the data in memory grow too large. + final Set> newStageOps = stageOps1; + newStageOps.addAll(stageOps2); + long dataSize = 0L; + for (Operator op : newStageOps) { + if (op instanceof MapJoinOperator) { + MapJoinOperator mop = (MapJoinOperator) op; + dataSize = StatsUtils.safeAdd(dataSize, mop.getConf().getInMemoryDataSize()); + if (dataSize > mop.getConf().getNoConditionalTaskSize()) { + // Size surpasses limit, we cannot convert + LOG.debug("accumulated data size: {} / max size: {}", + dataSize, mop.getConf().getNoConditionalTaskSize()); + return false; + } + } + } + return true; + } + + private static Set> findInputStagesOperators(Operator start) { + // Find operators in stage + Set> stageOps = findStageOperators(start); + // Gather input stages operators + Set> set = new HashSet>(); + for (Operator op : stageOps) { + if (op.getParentOperators() != null) { + for (Operator parent : op.getParentOperators()) { + if (parent instanceof ReduceSinkOperator) { + findStageOperators(parent, set); + } + } + } + } + return set; + } + + private static Set> findOutputStagesOperators(Operator start) { + // Find operators in stage + Set> stageOps = findStageOperators(start); + // Gather output stages operators + Set> set = new HashSet>(); + for (Operator op : stageOps) { + if (op instanceof ReduceSinkOperator) { + if (op.getChildOperators() != null) { + for (Operator child : op.getChildOperators()) { + findStageOperators(child, set); + } + } + } + } + return set; + } + + private static Set> findStageOperators(Operator start) { + return findStageOperators(start, new HashSet>()); + } + + private static Set> findStageOperators(Operator start, Set> found) { + found.add(start); + if (start.getParentOperators() != null) { + for (Operator parent : start.getParentOperators()) { + if (parent instanceof ReduceSinkOperator) { + continue; + } + if (!found.contains(parent)) { + findStageOperators(parent, found); + } + } + } + if (start instanceof ReduceSinkOperator) { + return found; + } + if (start.getChildOperators() != null) { + for (Operator child : start.getChildOperators()) { + if (!found.contains(child)) { + findStageOperators(child, found); + } + } + } + return found; + } + + private static void pushFilterToTopOfTableScan(TableScanOperator tsOp) throws UDFArgumentException { + ExprNodeGenericFuncDesc tableScanExprNode = tsOp.getConf().getFilterExpr(); + List> allChildren = + Lists.newArrayList(tsOp.getChildOperators()); + for (Operator op : allChildren) { + if (op instanceof FilterOperator) { + FilterOperator filterOp = (FilterOperator) op; + ExprNodeDesc filterExprNode = filterOp.getConf().getPredicate(); + if (tableScanExprNode.isSame(filterExprNode)) { + // We do not need to do anything + return; + } + if (tableScanExprNode.getGenericUDF() instanceof GenericUDFOPOr) { + for (ExprNodeDesc childExprNode : tableScanExprNode.getChildren()) { + if (childExprNode.isSame(filterExprNode)) { + // We do not need to do anything, it is in the OR expression + // so probably we pushed previously + return; + } + } + } + ExprNodeGenericFuncDesc newPred = ExprNodeGenericFuncDesc.newInstance( + new GenericUDFOPAnd(), + Arrays.asList(tableScanExprNode.clone(), filterExprNode)); + filterOp.getConf().setPredicate(newPred); + } else { + Operator newOp = OperatorFactory.getAndMakeChild( + new FilterDesc(tableScanExprNode.clone(), false), + tsOp); + tsOp.removeChild(op); + op.getParentOperators().remove(tsOp); + newOp.getChildOperators().add(op); + op.getParentOperators().add(newOp); + } + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java index f78bd7c..53abb21 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java @@ -282,7 +282,7 @@ public static void processSkewJoin(JoinOperator joinOp, newJoinValues, newJoinValueTblDesc, newJoinValueTblDesc,joinDescriptor .getOutputColumnNames(), i, joinDescriptor.getConds(), joinDescriptor.getFilters(), joinDescriptor.getNoOuterJoin(), dumpFilePrefix, - joinDescriptor.getNoConditionalTaskSize()); + joinDescriptor.getNoConditionalTaskSize(), joinDescriptor.getInMemoryDataSize()); mapJoinDescriptor.setTagOrder(tags); mapJoinDescriptor.setHandleSkewJoin(false); mapJoinDescriptor.setNullSafes(joinDescriptor.getNullSafes()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java index c970611..a5f0b2a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenSparkSkewJoinProcessor.java @@ -241,7 +241,7 @@ public static void processSkewJoin(JoinOperator joinOp, Task inputs, PerfLogger perfLogger = SessionState.getPerfLogger(); // Create the context for the walker OptimizeTezProcContext procCtx = new OptimizeTezProcContext(conf, pCtx, inputs, outputs); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); // setup dynamic partition pruning where possible runDynamicPartitionPruning(procCtx, inputs, outputs); @@ -136,6 +182,12 @@ protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, runCycleAnalysisForPartitionPruning(procCtx, inputs, outputs); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Run cycle analysis for partition pruning"); + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.TEZ_COMPILER); + if(procCtx.conf.getBoolVar(ConfVars.HIVE_SHARED_SCAN_OPTIMIZATION)) { + new SharedScanOptimizer().transform(procCtx.parseContext); + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "Shared scans optimization"); + // need a new run of the constant folding because we might have created lots // of "and true and true" conditions. // Rather than run the full constant folding just need to shortcut AND/OR expressions diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index c4fb3f3..a975c01 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -106,6 +106,7 @@ private transient Map> aliasToOpInfo; private transient boolean leftInputJoin; private transient List streamAliases; + protected transient long inMemoryDataSize; // non-transient field, used at runtime to kill a task if it exceeded memory limits when running in LLAP protected long noConditionalTaskSize; @@ -202,6 +203,7 @@ public JoinDesc(JoinDesc clone) { this.residualFilterExprs = clone.residualFilterExprs; this.statistics = clone.statistics; this.noConditionalTaskSize = clone.noConditionalTaskSize; + this.inMemoryDataSize = clone.inMemoryDataSize; } public Map> getExprs() { @@ -696,4 +698,12 @@ public long getNoConditionalTaskSize() { public void setNoConditionalTaskSize(final long noConditionalTaskSize) { this.noConditionalTaskSize = noConditionalTaskSize; } + + public long getInMemoryDataSize() { + return inMemoryDataSize; + } + + public void setInMemoryDataSize(final long inMemoryDataSize) { + this.inMemoryDataSize = inMemoryDataSize; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 8da85d2..f387e6a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -113,7 +113,7 @@ public MapJoinDesc(final Map> keys, final List valueTblDescs, final List valueFilteredTblDescs, List outputColumnNames, final int posBigTable, final JoinCondDesc[] conds, final Map> filters, boolean noOuterJoin, String dumpFilePrefix, - final long noConditionalTaskSize) { + final long noConditionalTaskSize, final long inMemoryDataSize) { super(values, outputColumnNames, noOuterJoin, conds, filters, null, noConditionalTaskSize); vectorDesc = null; this.keys = keys; @@ -123,6 +123,7 @@ public MapJoinDesc(final Map> keys, this.posBigTable = posBigTable; this.bigTableBucketNumMapping = new LinkedHashMap(); this.dumpFilePrefix = dumpFilePrefix; + this.inMemoryDataSize = inMemoryDataSize; initRetainExprList(); } diff --git ql/src/test/queries/clientpositive/perf/query88.q ql/src/test/queries/clientpositive/perf/query88.q index 2be814e..bb6ef6d 100644 --- ql/src/test/queries/clientpositive/perf/query88.q +++ ql/src/test/queries/clientpositive/perf/query88.q @@ -1,3 +1,5 @@ +set hive.strict.checks.cartesian.product=false; + explain select * from diff --git ql/src/test/results/clientpositive/llap/auto_join30.q.out ql/src/test/results/clientpositive/llap/auto_join30.q.out index a26db55..83a4dd7 100644 --- ql/src/test/results/clientpositive/llap/auto_join30.q.out +++ ql/src/test/results/clientpositive/llap/auto_join30.q.out @@ -24,8 +24,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -44,13 +44,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -77,7 +70,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -103,7 +96,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -169,9 +162,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -187,13 +180,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -220,7 +206,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col2, _col3 input vertices: - 1 Reducer 5 + 1 Reducer 4 Statistics: Num rows: 1219 Data size: 216982 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) @@ -246,7 +232,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator @@ -311,8 +297,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -328,13 +314,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -358,7 +337,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -384,7 +363,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -457,9 +436,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -478,13 +457,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -497,13 +469,6 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE @@ -530,7 +495,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -548,7 +513,7 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 0 Reducer 2 - 2 Reducer 7 + 2 Reducer 5 Statistics: Num rows: 2974 Data size: 529372 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(hash(_col2,_col3)) @@ -559,7 +524,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 5 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -574,7 +539,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -650,10 +615,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -669,13 +634,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -685,13 +643,6 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -752,7 +703,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -765,7 +716,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 8 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -841,10 +792,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -860,13 +811,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -876,13 +820,6 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -943,7 +880,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -956,7 +893,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 8 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1032,10 +969,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1051,13 +988,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1067,13 +997,6 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1134,7 +1057,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1147,7 +1070,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 8 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1223,10 +1146,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1242,13 +1165,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1258,13 +1174,6 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1325,7 +1234,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1338,7 +1247,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 8 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/perf/query28.q.out ql/src/test/results/clientpositive/perf/query28.q.out index f7c5225..78129cf 100644 --- ql/src/test/results/clientpositive/perf/query28.q.out +++ ql/src/test/results/clientpositive/perf/query28.q.out @@ -104,13 +104,13 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE) -Reducer 13 <- Map 12 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (CUSTOM_SIMPLE_EDGE), Reducer 13 (CUSTOM_SIMPLE_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE), Reducer 9 (CUSTOM_SIMPLE_EDGE) -Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) -Reducer 9 <- Map 8 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 1 (SIMPLE_EDGE) +Reducer 5 <- Map 1 (SIMPLE_EDGE) +Reducer 6 <- Map 1 (SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE) +Reducer 8 <- Map 1 (SIMPLE_EDGE) Stage-0 Fetch Operator @@ -124,34 +124,6 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] Merge Join Operator [MERGEJOIN_58] (rows=1 width=2497) Conds:(Inner),(Inner),(Inner),(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_46] - Group By Operator [GBY_33] (rows=1 width=416) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_32] - Group By Operator [GBY_31] (rows=21333171 width=88) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_30] (rows=21333171 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_56] (rows=21333171 width=88) - predicate:(ss_quantity BETWEEN 11 AND 15 and (ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24)) - TableScan [TS_28] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_47] - Group By Operator [GBY_40] (rows=1 width=416) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_39] - Group By Operator [GBY_38] (rows=21333171 width=88) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price - Select Operator [SEL_37] (rows=21333171 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_57] (rows=21333171 width=88) - predicate:(ss_quantity BETWEEN 6 AND 10 and (ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52)) - TableScan [TS_35] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_42] Group By Operator [GBY_5] (rows=1 width=416) @@ -166,11 +138,11 @@ Stage-0 predicate:(ss_quantity BETWEEN 0 AND 5 and (ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34)) TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_43] Group By Operator [GBY_12] (rows=1 width=416) Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 4 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_11] Group By Operator [GBY_10] (rows=21333171 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price @@ -178,13 +150,12 @@ Stage-0 Output:["ss_list_price"] Filter Operator [FIL_53] (rows=21333171 width=88) predicate:(ss_quantity BETWEEN 26 AND 30 and (ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62)) - TableScan [TS_7] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_44] Group By Operator [GBY_19] (rows=1 width=416) Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 6 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_18] Group By Operator [GBY_17] (rows=21333171 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price @@ -192,13 +163,12 @@ Stage-0 Output:["ss_list_price"] Filter Operator [FIL_54] (rows=21333171 width=88) predicate:(ss_quantity BETWEEN 21 AND 25 and (ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58)) - TableScan [TS_14] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] - <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_45] Group By Operator [GBY_26] (rows=1 width=416) Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] - <-Map 8 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_25] Group By Operator [GBY_24] (rows=21333171 width=88) Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price @@ -206,6 +176,31 @@ Stage-0 Output:["ss_list_price"] Filter Operator [FIL_55] (rows=21333171 width=88) predicate:(ss_quantity BETWEEN 16 AND 20 and (ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100)) - TableScan [TS_21] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + Please refer to the previous TableScan [TS_0] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_46] + Group By Operator [GBY_33] (rows=1 width=416) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_32] + Group By Operator [GBY_31] (rows=21333171 width=88) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_30] (rows=21333171 width=88) + Output:["ss_list_price"] + Filter Operator [FIL_56] (rows=21333171 width=88) + predicate:(ss_quantity BETWEEN 11 AND 15 and (ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24)) + Please refer to the previous TableScan [TS_0] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_47] + Group By Operator [GBY_40] (rows=1 width=416) + Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(DISTINCT KEY._col0:0._col0)"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_39] + Group By Operator [GBY_38] (rows=21333171 width=88) + Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(ss_list_price)","count(ss_list_price)","count(DISTINCT ss_list_price)"],keys:ss_list_price + Select Operator [SEL_37] (rows=21333171 width=88) + Output:["ss_list_price"] + Filter Operator [FIL_57] (rows=21333171 width=88) + predicate:(ss_quantity BETWEEN 6 AND 10 and (ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52)) + Please refer to the previous TableScan [TS_0] diff --git ql/src/test/results/clientpositive/perf/query88.q.out ql/src/test/results/clientpositive/perf/query88.q.out index fcb4042..f7af4ef 100644 --- ql/src/test/results/clientpositive/perf/query88.q.out +++ ql/src/test/results/clientpositive/perf/query88.q.out @@ -186,39 +186,39 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 11 <- Map 10 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) -Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) -Reducer 13 <- Map 17 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Map 1 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 12 <- Map 36 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 37 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) Reducer 14 <- Reducer 13 (CUSTOM_SIMPLE_EDGE) -Reducer 19 <- Map 18 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 20 <- Map 24 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Map 25 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 15 <- Map 1 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 16 <- Map 36 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) +Reducer 17 <- Map 37 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (CUSTOM_SIMPLE_EDGE) +Reducer 19 <- Map 1 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 20 <- Map 36 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 37 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) Reducer 22 <- Reducer 21 (CUSTOM_SIMPLE_EDGE) -Reducer 27 <- Map 26 (SIMPLE_EDGE), Map 31 (SIMPLE_EDGE) -Reducer 28 <- Map 32 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) -Reducer 29 <- Map 33 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) -Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 23 <- Map 1 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 24 <- Map 36 (SIMPLE_EDGE), Reducer 23 (SIMPLE_EDGE) +Reducer 25 <- Map 37 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 26 <- Reducer 25 (CUSTOM_SIMPLE_EDGE) +Reducer 27 <- Map 1 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 28 <- Map 36 (SIMPLE_EDGE), Reducer 27 (SIMPLE_EDGE) +Reducer 29 <- Map 37 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 3 <- Map 36 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 30 <- Reducer 29 (CUSTOM_SIMPLE_EDGE) -Reducer 35 <- Map 34 (SIMPLE_EDGE), Map 39 (SIMPLE_EDGE) -Reducer 36 <- Map 40 (SIMPLE_EDGE), Reducer 35 (SIMPLE_EDGE) -Reducer 37 <- Map 41 (SIMPLE_EDGE), Reducer 36 (SIMPLE_EDGE) -Reducer 38 <- Reducer 37 (CUSTOM_SIMPLE_EDGE) -Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 43 <- Map 42 (SIMPLE_EDGE), Map 47 (SIMPLE_EDGE) -Reducer 44 <- Map 48 (SIMPLE_EDGE), Reducer 43 (SIMPLE_EDGE) -Reducer 45 <- Map 49 (SIMPLE_EDGE), Reducer 44 (SIMPLE_EDGE) -Reducer 46 <- Reducer 45 (CUSTOM_SIMPLE_EDGE) +Reducer 31 <- Map 1 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 32 <- Map 36 (SIMPLE_EDGE), Reducer 31 (SIMPLE_EDGE) +Reducer 33 <- Map 37 (SIMPLE_EDGE), Reducer 32 (SIMPLE_EDGE) +Reducer 34 <- Reducer 33 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Map 37 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Reducer 51 <- Map 50 (SIMPLE_EDGE), Map 55 (SIMPLE_EDGE) -Reducer 52 <- Map 56 (SIMPLE_EDGE), Reducer 51 (SIMPLE_EDGE) -Reducer 53 <- Map 57 (SIMPLE_EDGE), Reducer 52 (SIMPLE_EDGE) -Reducer 54 <- Reducer 53 (CUSTOM_SIMPLE_EDGE) -Reducer 59 <- Map 58 (SIMPLE_EDGE), Map 63 (SIMPLE_EDGE) -Reducer 6 <- Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE), Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 38 (CUSTOM_SIMPLE_EDGE), Reducer 46 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE), Reducer 54 (CUSTOM_SIMPLE_EDGE), Reducer 62 (CUSTOM_SIMPLE_EDGE) -Reducer 60 <- Map 64 (SIMPLE_EDGE), Reducer 59 (SIMPLE_EDGE) -Reducer 61 <- Map 65 (SIMPLE_EDGE), Reducer 60 (SIMPLE_EDGE) -Reducer 62 <- Reducer 61 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 10 (CUSTOM_SIMPLE_EDGE), Reducer 14 (CUSTOM_SIMPLE_EDGE), Reducer 18 (CUSTOM_SIMPLE_EDGE), Reducer 22 (CUSTOM_SIMPLE_EDGE), Reducer 26 (CUSTOM_SIMPLE_EDGE), Reducer 30 (CUSTOM_SIMPLE_EDGE), Reducer 34 (CUSTOM_SIMPLE_EDGE), Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Map 1 (SIMPLE_EDGE), Map 35 (SIMPLE_EDGE) +Reducer 8 <- Map 36 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) +Reducer 9 <- Map 37 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) Stage-0 Fetch Operator @@ -230,452 +230,424 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] Merge Join Operator [MERGEJOIN_347] (rows=1 width=65) Conds:(Inner),(Inner),(Inner),(Inner),(Inner),(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Reducer 14 [CUSTOM_SIMPLE_EDGE] + <-Reducer 10 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_209] Group By Operator [GBY_50] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_49] Group By Operator [GBY_48] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Merge Join Operator [MERGEJOIN_328] (rows=766650239 width=88) Conds:RS_44._col2=RS_45._col0(Inner) - <-Map 17 [SIMPLE_EDGE] + <-Map 37 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 Select Operator [SEL_37] (rows=852 width=1910) Output:["_col0"] Filter Operator [FIL_298] (rows=852 width=1910) predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_35] (rows=1704 width=1910) + TableScan [TS_9] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 12 [SIMPLE_EDGE] + <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_327] (rows=696954748 width=88) Conds:RS_41._col1=RS_42._col0(Inner),Output:["_col2"] - <-Map 16 [SIMPLE_EDGE] + <-Map 36 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0 Select Operator [SEL_34] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_297] (rows=3600 width=107) predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_32] (rows=7200 width=107) + TableScan [TS_6] (rows=7200 width=107) default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 11 [SIMPLE_EDGE] + <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_326] (rows=633595212 width=88) Conds:RS_38._col0=RS_39._col0(Inner),Output:["_col1","_col2"] - <-Map 10 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col0 Select Operator [SEL_28] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_295] (rows=575995635 width=88) predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_26] (rows=575995635 width=88) + TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 15 [SIMPLE_EDGE] + <-Map 35 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col0 Select Operator [SEL_31] (rows=14400 width=471) Output:["_col0"] Filter Operator [FIL_296] (rows=14400 width=471) predicate:((t_hour = 12) and (t_minute < 30) and t_time_sk is not null) - TableScan [TS_29] (rows=86400 width=471) + TableScan [TS_3] (rows=86400 width=471) default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 22 [CUSTOM_SIMPLE_EDGE] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_210] Group By Operator [GBY_76] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_75] Group By Operator [GBY_74] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Merge Join Operator [MERGEJOIN_331] (rows=766650239 width=88) Conds:RS_70._col2=RS_71._col0(Inner) - <-Map 25 [SIMPLE_EDGE] + <-Map 37 [SIMPLE_EDGE] SHUFFLE [RS_71] PartitionCols:_col0 Select Operator [SEL_63] (rows=852 width=1910) Output:["_col0"] Filter Operator [FIL_302] (rows=852 width=1910) predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_61] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 20 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_9] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_70] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_330] (rows=696954748 width=88) Conds:RS_67._col1=RS_68._col0(Inner),Output:["_col2"] - <-Map 24 [SIMPLE_EDGE] + <-Map 36 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col0 Select Operator [SEL_60] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_301] (rows=3600 width=107) predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_58] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 19 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_6] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_329] (rows=633595212 width=88) Conds:RS_64._col0=RS_65._col0(Inner),Output:["_col1","_col2"] - <-Map 18 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col0 Select Operator [SEL_54] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_299] (rows=575995635 width=88) predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_52] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 23 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Map 35 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col0 Select Operator [SEL_57] (rows=14400 width=471) Output:["_col0"] Filter Operator [FIL_300] (rows=14400 width=471) predicate:((t_hour = 11) and (t_minute >= 30) and t_time_sk is not null) - TableScan [TS_55] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 30 [CUSTOM_SIMPLE_EDGE] + Please refer to the previous TableScan [TS_3] + <-Reducer 18 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_211] Group By Operator [GBY_102] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 29 [CUSTOM_SIMPLE_EDGE] + <-Reducer 17 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_101] Group By Operator [GBY_100] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Merge Join Operator [MERGEJOIN_334] (rows=766650239 width=88) Conds:RS_96._col2=RS_97._col0(Inner) - <-Map 33 [SIMPLE_EDGE] + <-Map 37 [SIMPLE_EDGE] SHUFFLE [RS_97] PartitionCols:_col0 Select Operator [SEL_89] (rows=852 width=1910) Output:["_col0"] Filter Operator [FIL_306] (rows=852 width=1910) predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_87] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 28 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_9] + <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_96] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_333] (rows=696954748 width=88) Conds:RS_93._col1=RS_94._col0(Inner),Output:["_col2"] - <-Map 32 [SIMPLE_EDGE] + <-Map 36 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col0 Select Operator [SEL_86] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_305] (rows=3600 width=107) predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_84] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 27 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_6] + <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_93] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_332] (rows=633595212 width=88) Conds:RS_90._col0=RS_91._col0(Inner),Output:["_col1","_col2"] - <-Map 26 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_90] PartitionCols:_col0 Select Operator [SEL_80] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_303] (rows=575995635 width=88) predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_78] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 31 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Map 35 [SIMPLE_EDGE] SHUFFLE [RS_91] PartitionCols:_col0 Select Operator [SEL_83] (rows=14400 width=471) Output:["_col0"] Filter Operator [FIL_304] (rows=14400 width=471) predicate:((t_hour = 11) and (t_minute < 30) and t_time_sk is not null) - TableScan [TS_81] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 38 [CUSTOM_SIMPLE_EDGE] + Please refer to the previous TableScan [TS_3] + <-Reducer 22 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_212] Group By Operator [GBY_128] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 37 [CUSTOM_SIMPLE_EDGE] + <-Reducer 21 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_127] Group By Operator [GBY_126] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Merge Join Operator [MERGEJOIN_337] (rows=766650239 width=88) Conds:RS_122._col2=RS_123._col0(Inner) - <-Map 41 [SIMPLE_EDGE] + <-Map 37 [SIMPLE_EDGE] SHUFFLE [RS_123] PartitionCols:_col0 Select Operator [SEL_115] (rows=852 width=1910) Output:["_col0"] Filter Operator [FIL_310] (rows=852 width=1910) predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_113] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 36 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_9] + <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_122] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_336] (rows=696954748 width=88) Conds:RS_119._col1=RS_120._col0(Inner),Output:["_col2"] - <-Map 40 [SIMPLE_EDGE] + <-Map 36 [SIMPLE_EDGE] SHUFFLE [RS_120] PartitionCols:_col0 Select Operator [SEL_112] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_309] (rows=3600 width=107) predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_110] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 35 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_6] + <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_119] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_335] (rows=633595212 width=88) Conds:RS_116._col0=RS_117._col0(Inner),Output:["_col1","_col2"] - <-Map 34 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_116] PartitionCols:_col0 Select Operator [SEL_106] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_307] (rows=575995635 width=88) predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_104] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 39 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Map 35 [SIMPLE_EDGE] SHUFFLE [RS_117] PartitionCols:_col0 Select Operator [SEL_109] (rows=14400 width=471) Output:["_col0"] Filter Operator [FIL_308] (rows=14400 width=471) predicate:((t_hour = 10) and (t_minute >= 30) and t_time_sk is not null) - TableScan [TS_107] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 46 [CUSTOM_SIMPLE_EDGE] + Please refer to the previous TableScan [TS_3] + <-Reducer 26 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_213] Group By Operator [GBY_154] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 45 [CUSTOM_SIMPLE_EDGE] + <-Reducer 25 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_153] Group By Operator [GBY_152] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Merge Join Operator [MERGEJOIN_340] (rows=766650239 width=88) Conds:RS_148._col2=RS_149._col0(Inner) - <-Map 49 [SIMPLE_EDGE] + <-Map 37 [SIMPLE_EDGE] SHUFFLE [RS_149] PartitionCols:_col0 Select Operator [SEL_141] (rows=852 width=1910) Output:["_col0"] Filter Operator [FIL_314] (rows=852 width=1910) predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_139] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 44 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_9] + <-Reducer 24 [SIMPLE_EDGE] SHUFFLE [RS_148] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_339] (rows=696954748 width=88) Conds:RS_145._col1=RS_146._col0(Inner),Output:["_col2"] - <-Map 48 [SIMPLE_EDGE] + <-Map 36 [SIMPLE_EDGE] SHUFFLE [RS_146] PartitionCols:_col0 Select Operator [SEL_138] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_313] (rows=3600 width=107) predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_136] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 43 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_6] + <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_145] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_338] (rows=633595212 width=88) Conds:RS_142._col0=RS_143._col0(Inner),Output:["_col1","_col2"] - <-Map 42 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_142] PartitionCols:_col0 Select Operator [SEL_132] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_311] (rows=575995635 width=88) predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_130] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 47 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Map 35 [SIMPLE_EDGE] SHUFFLE [RS_143] PartitionCols:_col0 Select Operator [SEL_135] (rows=14400 width=471) Output:["_col0"] Filter Operator [FIL_312] (rows=14400 width=471) predicate:((t_hour = 10) and (t_minute < 30) and t_time_sk is not null) - TableScan [TS_133] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_208] - Group By Operator [GBY_24] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 4 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_23] - Group By Operator [GBY_22] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Merge Join Operator [MERGEJOIN_325] (rows=766650239 width=88) - Conds:RS_18._col2=RS_19._col0(Inner) - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=852 width=1910) - Output:["_col0"] - Filter Operator [FIL_294] (rows=852 width=1910) - predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_324] (rows=696954748 width=88) - Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=3600 width=107) - Output:["_col0"] - Filter Operator [FIL_293] (rows=3600 width=107) - predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_6] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_323] (rows=633595212 width=88) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_291] (rows=575995635 width=88) - predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=14400 width=471) - Output:["_col0"] - Filter Operator [FIL_292] (rows=14400 width=471) - predicate:((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) - TableScan [TS_3] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 54 [CUSTOM_SIMPLE_EDGE] + Please refer to the previous TableScan [TS_3] + <-Reducer 30 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_214] Group By Operator [GBY_180] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 53 [CUSTOM_SIMPLE_EDGE] + <-Reducer 29 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_179] Group By Operator [GBY_178] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Merge Join Operator [MERGEJOIN_343] (rows=766650239 width=88) Conds:RS_174._col2=RS_175._col0(Inner) - <-Map 57 [SIMPLE_EDGE] + <-Map 37 [SIMPLE_EDGE] SHUFFLE [RS_175] PartitionCols:_col0 Select Operator [SEL_167] (rows=852 width=1910) Output:["_col0"] Filter Operator [FIL_318] (rows=852 width=1910) predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_165] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 52 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_9] + <-Reducer 28 [SIMPLE_EDGE] SHUFFLE [RS_174] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_342] (rows=696954748 width=88) Conds:RS_171._col1=RS_172._col0(Inner),Output:["_col2"] - <-Map 56 [SIMPLE_EDGE] + <-Map 36 [SIMPLE_EDGE] SHUFFLE [RS_172] PartitionCols:_col0 Select Operator [SEL_164] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_317] (rows=3600 width=107) predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_162] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 51 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_6] + <-Reducer 27 [SIMPLE_EDGE] SHUFFLE [RS_171] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_341] (rows=633595212 width=88) Conds:RS_168._col0=RS_169._col0(Inner),Output:["_col1","_col2"] - <-Map 50 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_168] PartitionCols:_col0 Select Operator [SEL_158] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_315] (rows=575995635 width=88) predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_156] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 55 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Map 35 [SIMPLE_EDGE] SHUFFLE [RS_169] PartitionCols:_col0 Select Operator [SEL_161] (rows=14400 width=471) Output:["_col0"] Filter Operator [FIL_316] (rows=14400 width=471) predicate:((t_hour = 9) and (t_minute >= 30) and t_time_sk is not null) - TableScan [TS_159] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] - <-Reducer 62 [CUSTOM_SIMPLE_EDGE] + Please refer to the previous TableScan [TS_3] + <-Reducer 34 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_215] Group By Operator [GBY_206] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Reducer 61 [CUSTOM_SIMPLE_EDGE] + <-Reducer 33 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_205] Group By Operator [GBY_204] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Merge Join Operator [MERGEJOIN_346] (rows=766650239 width=88) Conds:RS_200._col2=RS_201._col0(Inner) - <-Map 65 [SIMPLE_EDGE] + <-Map 37 [SIMPLE_EDGE] SHUFFLE [RS_201] PartitionCols:_col0 Select Operator [SEL_193] (rows=852 width=1910) Output:["_col0"] Filter Operator [FIL_322] (rows=852 width=1910) predicate:((s_store_name = 'ese') and s_store_sk is not null) - TableScan [TS_191] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name"] - <-Reducer 60 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_9] + <-Reducer 32 [SIMPLE_EDGE] SHUFFLE [RS_200] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_345] (rows=696954748 width=88) Conds:RS_197._col1=RS_198._col0(Inner),Output:["_col2"] - <-Map 64 [SIMPLE_EDGE] + <-Map 36 [SIMPLE_EDGE] SHUFFLE [RS_198] PartitionCols:_col0 Select Operator [SEL_190] (rows=3600 width=107) Output:["_col0"] Filter Operator [FIL_321] (rows=3600 width=107) predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) - TableScan [TS_188] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count","hd_vehicle_count"] - <-Reducer 59 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_6] + <-Reducer 31 [SIMPLE_EDGE] SHUFFLE [RS_197] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_344] (rows=633595212 width=88) Conds:RS_194._col0=RS_195._col0(Inner),Output:["_col1","_col2"] - <-Map 58 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_194] PartitionCols:_col0 Select Operator [SEL_184] (rows=575995635 width=88) Output:["_col0","_col1","_col2"] Filter Operator [FIL_319] (rows=575995635 width=88) predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) - TableScan [TS_182] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_time_sk","ss_hdemo_sk","ss_store_sk"] - <-Map 63 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Map 35 [SIMPLE_EDGE] SHUFFLE [RS_195] PartitionCols:_col0 Select Operator [SEL_187] (rows=14400 width=471) Output:["_col0"] Filter Operator [FIL_320] (rows=14400 width=471) predicate:((t_hour = 9) and (t_minute < 30) and t_time_sk is not null) - TableScan [TS_185] (rows=86400 width=471) - default@time_dim,time_dim,Tbl:COMPLETE,Col:NONE,Output:["t_time_sk","t_hour","t_minute"] + Please refer to the previous TableScan [TS_3] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_208] + Group By Operator [GBY_24] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_23] + Group By Operator [GBY_22] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Merge Join Operator [MERGEJOIN_325] (rows=766650239 width=88) + Conds:RS_18._col2=RS_19._col0(Inner) + <-Map 37 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=852 width=1910) + Output:["_col0"] + Filter Operator [FIL_294] (rows=852 width=1910) + predicate:((s_store_name = 'ese') and s_store_sk is not null) + Please refer to the previous TableScan [TS_9] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_324] (rows=696954748 width=88) + Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2"] + <-Map 36 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=3600 width=107) + Output:["_col0"] + Filter Operator [FIL_293] (rows=3600 width=107) + predicate:((((hd_dep_count = 3) and (hd_vehicle_count <= 5)) or ((hd_dep_count = 0) and (hd_vehicle_count <= 2)) or ((hd_dep_count = 1) and (hd_vehicle_count <= 3))) and hd_demo_sk is not null) + Please refer to the previous TableScan [TS_6] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_323] (rows=633595212 width=88) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_291] (rows=575995635 width=88) + predicate:(ss_hdemo_sk is not null and ss_sold_time_sk is not null and ss_store_sk is not null) + Please refer to the previous TableScan [TS_0] + <-Map 35 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=14400 width=471) + Output:["_col0"] + Filter Operator [FIL_292] (rows=14400 width=471) + predicate:((t_hour = 8) and (t_minute >= 30) and t_time_sk is not null) + Please refer to the previous TableScan [TS_3]