diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index e7724f9084..ed941f5841 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2383,7 +2383,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "If the skew information is correctly stored in the metadata, hive.optimize.skewjoin.compiletime\n" + "would change the query plan to take care of it, and hive.optimize.skewjoin will be a no-op."), - HIVE_OPTIMIZE_TOPNKEY("hive.optimize.topnkey", true, "Whether to enable top n key optimizer."), + HIVE_OPTIMIZE_TOPNKEY("hive.optimize.topnkey", false, "Whether to enable top n key optimizer."), HIVE_SHARED_WORK_OPTIMIZATION("hive.optimize.shared.work", true, "Whether to enable shared work optimizer. The optimizer finds scan operator over the same table\n" + diff --git data/conf/perf-reg/tez/hive-site.xml data/conf/perf-reg/tez/hive-site.xml index ab945f5f95..e5e0877dc9 100644 --- data/conf/perf-reg/tez/hive-site.xml +++ data/conf/perf-reg/tez/hive-site.xml @@ -317,4 +317,9 @@ query + + hive.optimize.topnkey + true + + diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/CommonKeyPrefix.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/CommonKeyPrefix.java new file mode 100644 index 0000000000..7552fa9d80 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/CommonKeyPrefix.java @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.topnkey; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Stream; + +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; + +/** + * Holds result of a common key prefix of two operators. + * Provides factory methods for mapping TopNKey operator keys to GroupBy and ReduceSink operator keys. + */ +public class CommonKeyPrefix { + + /** + * Factory method to map a {@link org.apache.hadoop.hive.ql.exec.TopNKeyOperator}'s and a {@link org.apache.hadoop.hive.ql.exec.GroupByOperator}'s keys. + * This method calls the {@link #map(List, String, String, List, Map, String, String)} method to do the mapping. + * Since the {@link GroupByDesc} does not contains any ordering information {@link TopNKeyDesc} ordering is passed for + * both operators. + * @param topNKeyDesc {@link TopNKeyDesc} which contains the {@link org.apache.hadoop.hive.ql.exec.TopNKeyOperator} keys. + * @param groupByDesc {@link GroupByDesc} which contains the {@link org.apache.hadoop.hive.ql.exec.GroupByOperator} keys. + * @return {@link CommonKeyPrefix} object containing the common key prefix of the mapped operators. + */ + public static CommonKeyPrefix map(TopNKeyDesc topNKeyDesc, GroupByDesc groupByDesc) { + return map(topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), + groupByDesc.getKeys(), groupByDesc.getColumnExprMap(), + topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder()); + } + + /** + * Factory method to map a {@link org.apache.hadoop.hive.ql.exec.TopNKeyOperator}'s and a {@link org.apache.hadoop.hive.ql.exec.ReduceSinkOperator}'s keys. + * This method calls the {@link #map(List, String, String, List, Map, String, String)} method to do the mapping. + * @param topNKeyDesc {@link TopNKeyDesc} which contains the {@link org.apache.hadoop.hive.ql.exec.TopNKeyOperator} keys. + * @param reduceSinkDesc {@link ReduceSinkDesc} which contains the {@link org.apache.hadoop.hive.ql.exec.ReduceSinkOperator} keys. + * @return {@link CommonKeyPrefix} object containing the common key prefix of the mapped operators. + */ + public static CommonKeyPrefix map(TopNKeyDesc topNKeyDesc, ReduceSinkDesc reduceSinkDesc) { + return map(topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), + reduceSinkDesc.getKeyCols(), reduceSinkDesc.getColumnExprMap(), + reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder()); + } + + /** + * General factory method to map two operator keys. + * Two keys are considered to be equal + * - if parent operator's {@param parentColExprMap} has an entry with the operator key column name + * - and that entry value has the same index as the operator key column index. + * - and both key columns has the same ordering + * - and both key columns has the same null ordering + * + * Ex.: op1: a, b, c, d + * op2: a, b, e + * result: a, b + * + * opKeys: Column[_col0], Column[_col1], Column[_col2], Column[_col3] + * parentKeys: Column[KEY._col0], Column[KEY._col1], Column[KEY._col4] + * parentColExprMap: {_col0 -> Column[KEY._col0]}, {_col1 -> Column[KEY._col1]}, {_col4 -> Column[KEY._col4]} + * + * Column ordering and null ordering is given by a string where each character represents one column order/null order. + * Ex.: a ASC NULLS FIRST, b DESC NULLS LAST, c ASC NULLS LAST -> order="+-+", null order="azz" + * + * When {@param parentColExprMap} is null this method falls back to {@link #map(List, String, String, List, String, String)}. + * + * @param opKeys {@link List} of {@link ExprNodeDesc}. contains the operator's key columns + * @param opOrder operator's key column ordering in {@link String} format + * @param opNullOrder operator's key column null ordering in {@link String} format + * @param parentKeys {@link List} of {@link ExprNodeDesc}. contains the parent operator's key columns + * @param parentColExprMap {@link Map} of {@link String} -> {@link ExprNodeDesc}. contains the parent operator's key column name {@link ExprNodeDesc} mapping + * @param parentOrder parent operator's key column ordering in {@link String} format + * @param parentNullOrder parent operator's key column null ordering in {@link String} format + * @return {@link CommonKeyPrefix} object containing the common key prefix of the mapped operators. + */ + public static CommonKeyPrefix map( + List opKeys, String opOrder, String opNullOrder, + List parentKeys, Map parentColExprMap, + String parentOrder, String parentNullOrder) { + + if (parentColExprMap == null) { + return map(opKeys, opOrder, opNullOrder, parentKeys, parentOrder, parentNullOrder); + } + + CommonKeyPrefix commonPrefix = new CommonKeyPrefix(); + int size = Stream.of(opKeys.size(), opOrder.length(), opNullOrder.length(), + parentKeys.size(), parentColExprMap.size(), parentOrder.length(), parentNullOrder.length()) + .min(Integer::compareTo) + .orElse(0); + + for (int i = 0; i < size; ++i) { + ExprNodeDesc column = opKeys.get(i); + String columnName = column.getExprString(); + ExprNodeDesc parentKey = parentKeys.get(i); + if (parentKey != null && parentKey.isSame(parentColExprMap.get(columnName)) && + opOrder.charAt(i) == parentOrder.charAt(i) && + opNullOrder.charAt(i) == parentNullOrder.charAt(i)) { + commonPrefix.add(parentKey, opOrder.charAt(i), opNullOrder.charAt(i)); + } else { + return commonPrefix; + } + } + return commonPrefix; + } + + // General factory method to map two operator keys. Operator's and parent operator's {@link ExprNodeDesc}s are compared using the + // {@link ExprNodeDesc.isSame} method. + public static CommonKeyPrefix map( + List opKeys, String opOrder, String opNullOrder, + List parentKeys, + String parentOrder, String parentNullOrder) { + + CommonKeyPrefix commonPrefix = new CommonKeyPrefix(); + int size = Stream.of(opKeys.size(), opOrder.length(), opNullOrder.length(), + parentKeys.size(), parentOrder.length(), parentNullOrder.length()) + .min(Integer::compareTo) + .orElse(0); + + for (int i = 0; i < size; ++i) { + ExprNodeDesc opKey = opKeys.get(i); + ExprNodeDesc parentKey = parentKeys.get(i); + if (opKey != null && opKey.isSame(parentKey) && + opOrder.charAt(i) == parentOrder.charAt(i) && + opNullOrder.charAt(i) == parentNullOrder.charAt(i)) { + commonPrefix.add(parentKey, opOrder.charAt(i), opNullOrder.charAt(i)); + } else { + return commonPrefix; + } + } + return commonPrefix; + } + + private List mappedColumns = new ArrayList<>(); + private StringBuilder mappedOrder = new StringBuilder(); + private StringBuilder mappedNullOrder = new StringBuilder(); + + private CommonKeyPrefix() { + } + + public void add(ExprNodeDesc column, char order, char nullOrder) { + mappedColumns.add(column); + mappedOrder.append(order); + mappedNullOrder.append(nullOrder); + } + + public boolean isEmpty() { + return mappedColumns.isEmpty(); + } + + public List getMappedColumns() { + return mappedColumns; + } + + public String getMappedOrder() { + return mappedOrder.toString(); + } + + public String getMappedNullOrder() { + return mappedNullOrder.toString(); + } + + public int size() { + return mappedColumns.size(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java similarity index 53% rename from ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java rename to ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java index 0d6cf3c755..738e4c3b44 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/TopNKeyProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyProcessor.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,9 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.ql.optimizer; +package org.apache.hadoop.hive.ql.optimizer.topnkey; -import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -27,9 +26,6 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; -import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; @@ -37,13 +33,13 @@ import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Stack; /** - * TopNKeyProcessor is a processor for TopNKeyOperator. A TopNKeyOperator will be placed between - * a GroupByOperator and its following ReduceSinkOperator. If there already is a TopNKeyOperator, - * then it will be skipped. + * TopNKeyProcessor is a processor for TopNKeyOperator. + * A TopNKeyOperator will be placed before any ReduceSinkOperator which has a topN property >= 0. */ public class TopNKeyProcessor implements NodeProcessor { private static final Logger LOG = LoggerFactory.getLogger(TopNKeyProcessor.class); @@ -59,12 +55,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) nd; ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf(); - // Get GroupByOperator - GroupByOperator groupByOperator = (GroupByOperator) reduceSinkOperator.getParentOperators().get(0); - GroupByDesc groupByDesc = groupByOperator.getConf(); - // Check whether the reduce sink operator contains top n - if (!reduceSinkDesc.isOrdering() || reduceSinkDesc.getTopN() < 0) { + if (reduceSinkDesc.getTopN() < 0 || !reduceSinkDesc.isOrdering()) { return null; } @@ -74,42 +66,35 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - // Check whether the group by operator is in hash mode - if (groupByDesc.getMode() != GroupByDesc.Mode.HASH) { - return null; - } - - // Check whether the group by operator has distinct aggregations - if (groupByDesc.isDistinct()) { - return null; - } - - // Check whether RS keys are same as GBY keys - List groupByKeyColumns = groupByDesc.getKeys(); - List mappedColumns = new ArrayList<>(); - for (ExprNodeDesc columns : reduceSinkDesc.getKeyCols()) { - mappedColumns.add(groupByDesc.getColumnExprMap().get(columns.getExprString())); - } - if (!ExprNodeDescUtils.isSame(mappedColumns, groupByKeyColumns)) { - return null; - } - // Check whether there already is a top n key operator - Operator parentOperator = groupByOperator.getParentOperators().get(0); + Operator parentOperator = reduceSinkOperator.getParentOperators().get(0); if (parentOperator instanceof TopNKeyOperator) { return null; } - // Insert a new top n key operator between the group by operator and its parent - TopNKeyDesc topNKeyDesc = new TopNKeyDesc( - reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), reduceSinkDesc.getNullOrder(), groupByKeyColumns); - Operator newOperator = OperatorFactory.getAndMakeChild( - groupByOperator.getCompilationOpContext(), (OperatorDesc) topNKeyDesc, - new RowSchema(groupByOperator.getSchema()), groupByOperator.getParentOperators()); - newOperator.getChildOperators().add(groupByOperator); - groupByOperator.getParentOperators().add(newOperator); - parentOperator.removeChild(groupByOperator); + TopNKeyDesc topNKeyDesc = new TopNKeyDesc(reduceSinkDesc.getTopN(), reduceSinkDesc.getOrder(), + reduceSinkDesc.getNullOrder(), reduceSinkDesc.getKeyCols()); + copyDown(reduceSinkOperator, topNKeyDesc); return null; } + + static TopNKeyOperator copyDown(Operator child, OperatorDesc operatorDesc) { + final List> parents = child.getParentOperators(); + + final Operator newOperator = + OperatorFactory.getAndMakeChild( + child.getCompilationOpContext(), operatorDesc, + new RowSchema(parents.get(0).getSchema()), child.getParentOperators()); + + newOperator.getChildOperators().add(child); + + for (Operator parent : parents) { + parent.removeChild(child); + } + child.getParentOperators().clear(); + child.getParentOperators().add(newOperator); + + return (TopNKeyOperator) newOperator; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java new file mode 100644 index 0000000000..9ba5369389 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/topnkey/TopNKeyPushdownProcessor.java @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.topnkey; + +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.JoinCondDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.TopNKeyDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import static org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyProcessor.copyDown; + +/** + * Implementation of TopNKey operator pushdown. + */ +public class TopNKeyPushdownProcessor implements NodeProcessor { + private static final Logger LOG = LoggerFactory.getLogger(TopNKeyPushdownProcessor.class); + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + pushdown((TopNKeyOperator) nd); + return null; + } + + private void pushdown(TopNKeyOperator topNKey) throws SemanticException { + + final Operator parent = + topNKey.getParentOperators().get(0); + + switch (parent.getType()) { + case SELECT: + pushdownThroughSelect(topNKey); + break; + + case FORWARD: + pushdownThroughParent(topNKey); + break; + + case GROUPBY: + pushdownThroughGroupBy(topNKey); + break; + + case REDUCESINK: + pushdownThroughReduceSink(topNKey); + break; + + case MERGEJOIN: + case JOIN: + pushDownThroughJoin(topNKey); + break; + + case TOPNKEY: + pushdownThroughTopNKey(topNKey); + break; + + default: + break; + } + } + + /** + * Push through Project if expression(s) in TopNKey can be mapped to expression(s) based on Project input. + * + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful in the method pushdown + */ + private void pushdownThroughSelect(TopNKeyOperator topNKey) throws SemanticException { + + final SelectOperator select = (SelectOperator) topNKey.getParentOperators().get(0); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + final List mappedColumns = mapColumns(topNKeyDesc.getKeyColumns(), select.getColumnExprMap()); + if (mappedColumns.size() != topNKeyDesc.getKeyColumns().size()) { + return; + } + + LOG.debug("Pushing {} through {}", topNKey.getName(), select.getName()); + topNKeyDesc.setKeyColumns(mappedColumns); + moveDown(topNKey); + pushdown(topNKey); + } + + private static List mapColumns(List columns, Map + colExprMap) { + + if (colExprMap == null) { + return new ArrayList<>(0); + } + final List mappedColumns = new ArrayList<>(); + for (ExprNodeDesc column : columns) { + final String columnName = column.getExprString(); + if (colExprMap.containsKey(columnName)) { + mappedColumns.add(colExprMap.get(columnName)); + } + } + return mappedColumns; + } + + private void pushdownThroughParent(TopNKeyOperator topNKey) throws SemanticException { + Operator parent = topNKey.getParentOperators().get(0); + LOG.debug("Pushing {} through {}", topNKey.getName(), parent.getName()); + moveDown(topNKey); + pushdown(topNKey); + } + + /** + * Push through GroupBy. No grouping sets. If TopNKey expression is same as GroupBy expression, + * we can push it and remove it from above GroupBy. If expression in TopNKey shared common + * prefix with GroupBy, TopNKey could be pushed through GroupBy using that prefix and kept above + * it. + * + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful + */ + private void pushdownThroughGroupBy(TopNKeyOperator topNKey) throws SemanticException { + final GroupByOperator groupBy = (GroupByOperator) topNKey.getParentOperators().get(0); + final GroupByDesc groupByDesc = groupBy.getConf(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + // Check grouping sets + if (groupByDesc.isGroupingSetsPresent()) { + return; + } + + CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, groupByDesc); + if (commonKeyPrefix.isEmpty()) { + return; + } + + LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), groupBy.getName()); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), commonKeyPrefix.getMappedOrder(), + commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + pushdown(copyDown(groupBy, newTopNKeyDesc)); + + if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + LOG.debug("Removing {} above {}", topNKey.getName(), groupBy.getName()); + groupBy.removeChildAndAdoptItsChildren(topNKey); + } + } + + /** + * Push through ReduceSink. If TopNKey expression is same as ReduceSink expression and order is + * the same, we can push it and remove it from above ReduceSink. If expression in TopNKey shared + * common prefix with ReduceSink including same order, TopNKey could be pushed through + * ReduceSink using that prefix and kept above it. + * + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful + */ + private void pushdownThroughReduceSink(TopNKeyOperator topNKey) throws SemanticException { + ReduceSinkOperator reduceSink = (ReduceSinkOperator) topNKey.getParentOperators().get(0); + final ReduceSinkDesc reduceSinkDesc = reduceSink.getConf(); + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + + CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map(topNKeyDesc, reduceSinkDesc); + if (commonKeyPrefix.isEmpty()) { + return; + } + + LOG.debug("Pushing a copy of {} through {}", topNKey.getName(), reduceSink.getName()); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), + commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + pushdown(copyDown(reduceSink, newTopNKeyDesc)); + + if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + LOG.debug("Removing {} above {}", topNKey.getName(), reduceSink.getName()); + reduceSink.removeChildAndAdoptItsChildren(topNKey); + } + } + + // Only push down through Left Outer Join is supported. + // Right and Full Outer Join support will be added in a follow up patch. + private void pushDownThroughJoin(TopNKeyOperator topNKey) + throws SemanticException { + CommonJoinOperator parent = + (CommonJoinOperator) topNKey.getParentOperators().get(0); + JoinCondDesc[] joinConds = parent.getConf().getConds(); + JoinCondDesc firstJoinCond = joinConds[0]; + for (JoinCondDesc joinCond : joinConds) { + if (!firstJoinCond.equals(joinCond)) { + return; + } + } + if (firstJoinCond.getType() == JoinDesc.LEFT_OUTER_JOIN) { + pushdownThroughLeftOuterJoin(topNKey); + } + } + + /** + * Push through LOJ. If TopNKey expression refers fully to expressions from left input, push + * with rewriting of expressions and remove from top of LOJ. If TopNKey expression has a prefix + * that refers to expressions from left input, push with rewriting of those expressions and keep + * on top of LOJ. + * + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful + */ + private void pushdownThroughLeftOuterJoin(TopNKeyOperator topNKey) throws SemanticException { + final TopNKeyDesc topNKeyDesc = topNKey.getConf(); + final CommonJoinOperator join = + (CommonJoinOperator) topNKey.getParentOperators().get(0); + final List> joinInputs = join.getParentOperators(); + final ReduceSinkOperator reduceSinkOperator = (ReduceSinkOperator) joinInputs.get(0); + final ReduceSinkDesc reduceSinkDesc = reduceSinkOperator.getConf(); + + CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map( + mapUntilColumnEquals(topNKeyDesc.getKeyColumns(), join.getColumnExprMap()), + topNKeyDesc.getColumnSortOrder(), + topNKeyDesc.getNullOrder(), + reduceSinkDesc.getKeyCols(), + reduceSinkDesc.getColumnExprMap(), + reduceSinkDesc.getOrder(), + reduceSinkDesc.getNullOrder()); + if (commonKeyPrefix.isEmpty()) { + return; + } + + LOG.debug("Pushing a copy of {} through {} and {}", + topNKey.getName(), join.getName(), reduceSinkOperator.getName()); + final TopNKeyDesc newTopNKeyDesc = new TopNKeyDesc(topNKeyDesc.getTopN(), + commonKeyPrefix.getMappedOrder(), commonKeyPrefix.getMappedNullOrder(), commonKeyPrefix.getMappedColumns()); + pushdown(copyDown(reduceSinkOperator, newTopNKeyDesc)); + + if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + LOG.debug("Removing {} above {}", topNKey.getName(), join.getName()); + join.removeChildAndAdoptItsChildren(topNKey); + } + } + + private List mapUntilColumnEquals(List columns, Map colExprMap) { + if (colExprMap == null) { + return new ArrayList<>(0); + } + final List mappedColumns = new ArrayList<>(); + for (ExprNodeDesc column : columns) { + final String columnName = column.getExprString(); + if (colExprMap.containsKey(columnName)) { + mappedColumns.add(colExprMap.get(columnName)); + } else { + return mappedColumns; + } + } + return mappedColumns; + } + + /** + * Push through another Top N Key operator. + * If the TNK operators are the same one of them will be removed. See {@link TopNKeyDesc#isSame} + * else If expression in {@param topnKey} is a common prefix in it's parent TNK op and topN property is same then {@param topnkey} + * could be pushed through parent. + * If the Top N Key operator can not be pushed through this method tries to remove one of them: + * - if topN property is the same and the keys of one of the operators are subset of the other then the operator can be removed + * - if the keys are the same operator with higher topN value can be removed + * @param topNKey TopNKey operator to push + * @throws SemanticException when removeChildAndAdoptItsChildren was not successful + */ + private void pushdownThroughTopNKey(TopNKeyOperator topNKey) throws SemanticException { + TopNKeyOperator parent = (TopNKeyOperator) topNKey.getParentOperators().get(0); + if (hasSameTopNKeyDesc(parent, topNKey.getConf())) { + LOG.debug("Removing {} above same operator: {}", topNKey.getName(), parent.getName()); + parent.removeChildAndAdoptItsChildren(topNKey); + return; + } + + TopNKeyDesc topNKeyDesc = topNKey.getConf(); + TopNKeyDesc parentTopNKeyDesc = parent.getConf(); + CommonKeyPrefix commonKeyPrefix = CommonKeyPrefix.map( + topNKeyDesc.getKeyColumns(), topNKeyDesc.getColumnSortOrder(), topNKeyDesc.getNullOrder(), + parentTopNKeyDesc.getKeyColumns(), parentTopNKeyDesc.getColumnSortOrder(), + parentTopNKeyDesc.getNullOrder()); + + if (topNKeyDesc.getTopN() == parentTopNKeyDesc.getTopN()) { + if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + // TNK keys are subset of the parent TNK keys + pushdownThroughParent(topNKey); + if (topNKey.getChildOperators().get(0).getType() == OperatorType.TOPNKEY) { + LOG.debug("Removing {} since child {} supersedes it", parent.getName(), topNKey.getName()); + topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey); + } + } else if (parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + // parent TNK keys are subset of TNK keys + LOG.debug("Removing parent of {} since it supersedes", topNKey.getName()); + parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent); + } + } else if (topNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size() && + parentTopNKeyDesc.getKeyColumns().size() == commonKeyPrefix.size()) { + if (topNKeyDesc.getTopN() > parentTopNKeyDesc.getTopN()) { + LOG.debug("Removing {}. Parent {} has same keys but lower topN {} > {}", + topNKey.getName(), parent.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN()); + topNKey.getParentOperators().get(0).removeChildAndAdoptItsChildren(topNKey); + } else { + LOG.debug("Removing parent {}. {} has same keys but lower topN {} < {}", + parent.getName(), topNKey.getName(), topNKeyDesc.getTopN(), parentTopNKeyDesc.getTopN()); + parent.getParentOperators().get(0).removeChildAndAdoptItsChildren(parent); + } + } + } + + private static boolean hasSameTopNKeyDesc(Operator operator, TopNKeyDesc desc) { + if (!(operator instanceof TopNKeyOperator)) { + return false; + } + + final TopNKeyOperator topNKey = (TopNKeyOperator) operator; + final TopNKeyDesc opDesc = topNKey.getConf(); + return opDesc.isSame(desc); + } + + private static void moveDown(TopNKeyOperator topNKey) throws SemanticException { + + assert topNKey.getNumParent() == 1; + final Operator parent = topNKey.getParentOperators().get(0); + final List> grandParents = parent.getParentOperators(); + parent.removeChildAndAdoptItsChildren(topNKey); + for (Operator grandParent : grandParents) { + grandParent.replaceChild(parent, topNKey); + } + topNKey.getParentOperators().clear(); + topNKey.getParentOperators().addAll(grandParents); + + topNKey.getChildOperators().clear(); + topNKey.getChildOperators().add(parent); + + parent.getParentOperators().clear(); + parent.getParentOperators().add(topNKey); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index bf58bd8bb8..e11c72e6c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -19,7 +19,6 @@ import com.google.common.collect.ListMultimap; import com.google.common.collect.Sets; -import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; @@ -63,6 +62,7 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TerminalOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.TopNKeyOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -93,9 +93,10 @@ import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer; import org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer; -import org.apache.hadoop.hive.ql.optimizer.TopNKeyProcessor; +import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyProcessor; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication; +import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor; import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits; import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer; @@ -1287,9 +1288,12 @@ private static void runTopNKeyOptimization(OptimizeTezProcContext procCtx) Map opRules = new LinkedHashMap(); opRules.put( - new RuleRegExp("Top n key optimization", GroupByOperator.getOperatorName() + "%" + - ReduceSinkOperator.getOperatorName() + "%"), + new RuleRegExp("Top n key optimization", ReduceSinkOperator.getOperatorName() + "%"), new TopNKeyProcessor()); + opRules.put( + new RuleRegExp("Top n key pushdown", TopNKeyOperator.getOperatorName() + "%"), + new TopNKeyPushdownProcessor()); + // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java new file mode 100644 index 0000000000..8661c14283 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/topnkey/TestCommonKeyPrefix.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.topnkey; + +import static java.util.Arrays.asList; +import static java.util.Collections.singletonList; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.junit.Test; + +/** + * Tests for CommonKeyPrefix. + */ +public class TestCommonKeyPrefix { + @Test + public void testmapWhenNoKeysExists() { + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + new ArrayList<>(0), "", "", new ArrayList<>(0), new HashMap<>(0), "", ""); + // then + assertThat(commonPrefix.isEmpty(), is(true)); + assertThat(commonPrefix.size(), is(0)); + assertThat(commonPrefix.getMappedOrder(), is("")); + assertThat(commonPrefix.getMappedNullOrder(), is("")); + assertThat(commonPrefix.getMappedColumns().isEmpty(), is(true)); + } + + @Test + public void testmapWhenAllKeysMatch() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc childCol1 = new ExprNodeColumnDesc(); + childCol1.setColumn("_col1"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + ExprNodeColumnDesc parentCol1 = new ExprNodeColumnDesc(); + parentCol1.setColumn("KEY._col1"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + exprNodeDescMap.put("_col1", parentCol1); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "++", "aa", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(2)); + assertThat(commonPrefix.getMappedOrder(), is("++")); + assertThat(commonPrefix.getMappedNullOrder(), is("aa")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + assertThat(commonPrefix.getMappedColumns().get(1), is(parentCol1)); + } + + @Test + public void testmapWhenOnlyFirstKeyMatchFromTwo() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc differentChildCol = new ExprNodeColumnDesc(); + differentChildCol.setColumn("_col2"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + ExprNodeColumnDesc parentCol1 = new ExprNodeColumnDesc(); + parentCol1.setColumn("KEY._col1"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + exprNodeDescMap.put("_col1", parentCol1); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, differentChildCol), "++", "aa", + asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(1)); + assertThat(commonPrefix.getMappedOrder(), is("+")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + } + + @Test + public void testmapWhenAllColumnsMatchButOrderMismatch() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc childCol1 = new ExprNodeColumnDesc(); + childCol1.setColumn("_col1"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + ExprNodeColumnDesc parentCol1 = new ExprNodeColumnDesc(); + parentCol1.setColumn("KEY._col1"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + exprNodeDescMap.put("_col1", parentCol1); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "+-", "aa", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(1)); + assertThat(commonPrefix.getMappedOrder(), is("+")); + assertThat(commonPrefix.getMappedNullOrder(), is("a")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + + // when + commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "-+", "aa", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(true)); + } + + @Test + public void testmapWhenAllColumnsMatchButNullOrderMismatch() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc childCol1 = new ExprNodeColumnDesc(); + childCol1.setColumn("_col1"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + ExprNodeColumnDesc parentCol1 = new ExprNodeColumnDesc(); + parentCol1.setColumn("KEY._col1"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + exprNodeDescMap.put("_col1", parentCol1); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "++", "az", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(1)); + assertThat(commonPrefix.getMappedOrder(), is("+")); + assertThat(commonPrefix.getMappedNullOrder(), is("a")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + + // when + commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "++", "za", asList(parentCol0, parentCol1), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(true)); + } + + @Test + public void testmapWhenKeyCountsMismatch() { + // given + ExprNodeColumnDesc childCol0 = new ExprNodeColumnDesc(); + childCol0.setColumn("_col0"); + ExprNodeColumnDesc childCol1 = new ExprNodeColumnDesc(); + childCol1.setColumn("_col1"); + ExprNodeColumnDesc parentCol0 = new ExprNodeColumnDesc(); + parentCol0.setColumn("KEY._col0"); + Map exprNodeDescMap = new HashMap<>(); + exprNodeDescMap.put("_col0", parentCol0); + + // when + CommonKeyPrefix commonPrefix = CommonKeyPrefix.map( + asList(childCol0, childCol1), "++", "aa", singletonList(parentCol0), exprNodeDescMap, "++", "aa"); + + // then + assertThat(commonPrefix.isEmpty(), is(false)); + assertThat(commonPrefix.size(), is(1)); + assertThat(commonPrefix.getMappedOrder(), is("+")); + assertThat(commonPrefix.getMappedColumns().get(0), is(parentCol0)); + } +} diff --git ql/src/test/queries/clientpositive/topnkey.q ql/src/test/queries/clientpositive/topnkey.q index 057b6a45ba..6b53d6c6b1 100644 --- ql/src/test/queries/clientpositive/topnkey.q +++ ql/src/test/queries/clientpositive/topnkey.q @@ -1,30 +1,68 @@ --! qt:dataset:src -set hive.mapred.mode=nonstrict; -set hive.vectorized.execution.enabled=false; -set hive.optimize.topnkey=true; - -set hive.optimize.ppd=true; -set hive.ppd.remove.duplicatefilters=true; -set hive.tez.dynamic.partition.pruning=true; -set hive.optimize.metadataonly=false; -set hive.optimize.index.filter=true; -set hive.tez.min.bloom.filter.entries=1; - -set hive.tez.dynamic.partition.pruning=true; -set hive.stats.fetch.column.stats=true; -set hive.cbo.enable=true; - -EXPLAIN EXTENDED +SET hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled=false; +SET hive.optimize.topnkey=true; + +SET hive.optimize.ppd=true; +SET hive.ppd.remove.duplicatefilters=true; +SET hive.tez.dynamic.partition.pruning=true; +SET hive.optimize.metadataonly=false; +SET hive.optimize.index.filter=true; +SET hive.tez.min.bloom.filter.entries=1; + +SET hive.stats.fetch.column.stats=true; +SET hive.cbo.enable=true; + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; +SET hive.optimize.topnkey=false; SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5; +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SET hive.optimize.topnkey=false; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5; + +SET hive.optimize.topnkey=true; EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5; -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5; +SET hive.optimize.topnkey=false; +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5; + +CREATE TABLE t_test( + a int, + b int, + c int +); + +INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2); + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3; +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3; + +SET hive.optimize.topnkey=false; +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3; + +SET hive.optimize.topnkey=true; +EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3; +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3; -explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +SET hive.optimize.topnkey=false; +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3; -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5; +DROP TABLE t_test; diff --git ql/src/test/queries/clientpositive/vector_topnkey.q ql/src/test/queries/clientpositive/vector_topnkey.q index 85c5880cd6..3412aba9db 100644 --- ql/src/test/queries/clientpositive/vector_topnkey.q +++ ql/src/test/queries/clientpositive/vector_topnkey.q @@ -43,4 +43,4 @@ SELECT cint1, cdouble FROM t_test GROUP BY cint1, cdouble ORDER BY cint1, cdoubl SELECT cvarchar, cdouble FROM t_test GROUP BY cvarchar, cdouble ORDER BY cvarchar, cdouble LIMIT 3; SELECT cdecimal1, cdecimal2 FROM t_test GROUP BY cdecimal1, cdecimal2 ORDER BY cdecimal1, cdecimal2 LIMIT 3; -DROP TABLE t_test; \ No newline at end of file +DROP TABLE t_test; diff --git ql/src/test/results/clientpositive/llap/bucket_groupby.q.out ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index 0c051c926b..8d5ad0cedc 100644 --- ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -74,27 +74,21 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -217,27 +211,21 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -334,27 +322,21 @@ STAGE PLANS: expressions: length(key) (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: _col0 (type: int) - null sort order: a - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -430,27 +412,21 @@ STAGE PLANS: expressions: abs(length(key)) (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: _col0 (type: int) - null sort order: a - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: _col0 (type: int) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 250 Data size: 3000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -527,27 +503,21 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -645,27 +615,21 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: value (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: value (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1281,28 +1245,22 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() + bucketGroup: true keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - bucketGroup: true - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1400,27 +1358,21 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: value (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: value (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1625,28 +1577,22 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() + bucketGroup: true keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - bucketGroup: true - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1744,27 +1690,21 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator + aggregations: count() keys: key (type: string), value (type: string) - null sort order: za - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2 + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: za - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/check_constraint.q.out ql/src/test/results/clientpositive/llap/check_constraint.q.out index 9f2c9a1cd0..86e195cf9b 100644 --- ql/src/test/results/clientpositive/llap/check_constraint.q.out +++ ql/src/test/results/clientpositive/llap/check_constraint.q.out @@ -1756,27 +1756,21 @@ STAGE PLANS: expressions: key (type: string), value (type: string), UDFToInteger(key) (type: int), CAST( key AS decimal(5,2)) (type: decimal(5,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator + aggregations: min(_col2), max(_col3) keys: _col0 (type: string), _col1 (type: string) - null sort order: zz - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - aggregations: min(_col2), max(_col3) - keys: _col0 (type: string), _col1 (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 73500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: int), _col3 (type: decimal(5,2)) + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: int), _col3 (type: decimal(5,2)) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/constraints_optimization.q.out ql/src/test/results/clientpositive/llap/constraints_optimization.q.out index b6d210becf..572156f1c9 100644 --- ql/src/test/results/clientpositive/llap/constraints_optimization.q.out +++ ql/src/test/results/clientpositive/llap/constraints_optimization.q.out @@ -330,27 +330,21 @@ STAGE PLANS: expressions: d_datekey (type: bigint), d_id (type: bigint) outputColumnNames: d_datekey, d_id Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: ++ + Group By Operator + aggregations: count() keys: d_datekey (type: bigint), d_id (type: bigint) - null sort order: za + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Group By Operator - aggregations: count() - keys: d_datekey (type: bigint), d_id (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: bigint) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: bigint) - null sort order: za - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: bigint) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -598,25 +592,19 @@ STAGE PLANS: expressions: d_datekey (type: bigint), d_sellingseason (type: string) outputColumnNames: d_datekey, d_sellingseason Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator keys: d_datekey (type: bigint), d_sellingseason (type: string) - null sort order: za - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Group By Operator - keys: d_datekey (type: bigint), d_sellingseason (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: string) - null sort order: za - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: string) - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out index 9343e078b7..6119f38847 100644 --- ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out +++ ql/src/test/results/clientpositive/llap/enforce_constraint_notnull.q.out @@ -3081,25 +3081,19 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator keys: key (type: string), value (type: string) - null sort order: za - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 2 - Group By Operator - keys: key (type: string), value (type: string) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: za - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/explainuser_1.q.out ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 283a665a20..379abecd9c 100644 --- ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -1529,21 +1529,19 @@ Stage-0 PartitionCols:_col0, _col1 Group By Operator [GBY_7] (rows=5 width=20) Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 - Top N Key Operator [TNK_15] (rows=10 width=101) - keys:_col1, _col0,top n:1 - Select Operator [SEL_5] (rows=10 width=101) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=10 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_3] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_2] (rows=10 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Select Operator [SEL_1] (rows=20 width=88) - Output:["key","c_int","c_float"] - TableScan [TS_0] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Select Operator [SEL_5] (rows=10 width=101) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=10 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_3] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_2] (rows=10 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Select Operator [SEL_1] (rows=20 width=88) + Output:["key","c_int","c_float"] + TableScan [TS_0] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select key from(select key from (select key from cbo_t1 limit 5)cbo_t2 limit 5)cbo_t3 limit 5 PREHOOK: type: QUERY @@ -1664,7 +1662,7 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col4, _col7 Select Operator [SEL_35] (rows=2 width=20) Output:["_col4","_col7"] - Merge Join Operator [MERGEJOIN_72] (rows=2 width=20) + Merge Join Operator [MERGEJOIN_71] (rows=2 width=20) Conds:RS_32._col2=RS_33._col0(Inner),Output:["_col4","_col5","_col7","_col8"],residual filter predicates:{(_col5 or _col8)} <-Map 11 [SIMPLE_EDGE] llap SHUFFLE [RS_33] @@ -1678,7 +1676,7 @@ Stage-0 <-Reducer 4 [SIMPLE_EDGE] llap SHUFFLE [RS_32] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_71] (rows=1 width=105) + Merge Join Operator [MERGEJOIN_70] (rows=1 width=105) Conds:RS_29._col0=RS_30._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"],residual filter predicates:{((_col3 + _col1) >= 0)} <-Reducer 10 [SIMPLE_EDGE] llap SHUFFLE [RS_30] @@ -1702,12 +1700,10 @@ Stage-0 PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_16] (rows=3 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Top N Key Operator [TNK_55] (rows=6 width=93) - keys:key, c_int, c_float,top n:5 - Filter Operator [FIL_53] (rows=6 width=93) - predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) - TableScan [TS_13] (rows=20 width=88) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Filter Operator [FIL_53] (rows=6 width=93) + predicate:(((c_int > 0) or (c_float >= 0.0)) and ((c_int + 1) >= 0)) + TableScan [TS_13] (rows=20 width=88) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_29] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/llap/explainuser_2.q.out ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 0219af8833..9bc2fac74e 100644 --- ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -335,126 +335,124 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized, llap - File Output Operator [FS_217] - Limit [LIM_216] (rows=2 width=285) + File Output Operator [FS_216] + Limit [LIM_215] (rows=2 width=285) Number of rows:100 - Select Operator [SEL_215] (rows=2 width=285) + Select Operator [SEL_214] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_214] - Group By Operator [GBY_213] (rows=2 width=285) + SHUFFLE [RS_213] + Group By Operator [GBY_212] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] llap SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_48] (rows=2 width=285) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col8)","count(_col15)","count(_col3)"],keys:_col7, _col14, _col2 - Top N Key Operator [TNK_91] (rows=28 width=534) - keys:_col7, _col14, _col2,top n:100 - Merge Join Operator [MERGEJOIN_188] (rows=28 width=534) - Conds:RS_44._col1, _col3=RS_45._col10, _col12(Inner),Output:["_col2","_col3","_col7","_col8","_col14","_col15"] - <-Reducer 10 [SIMPLE_EDGE] llap - SHUFFLE [RS_45] - PartitionCols:_col10, _col12 - Select Operator [SEL_40] (rows=2 width=447) - Output:["_col2","_col3","_col9","_col10","_col12"] - Merge Join Operator [MERGEJOIN_187] (rows=2 width=447) - Conds:RS_37._col2, _col4=RS_38._col1, _col3(Inner),Output:["_col0","_col1","_col9","_col10","_col12"] - <-Reducer 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] - PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_186] (rows=5 width=356) - Conds:RS_212._col0=RS_200._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_200] - PartitionCols:_col0 - Select Operator [SEL_197] (rows=5 width=87) - Output:["_col0"] - Filter Operator [FIL_194] (rows=5 width=178) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=178) - default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 15 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_212] - PartitionCols:_col0 - Select Operator [SEL_211] (rows=7 width=443) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_210] (rows=7 width=534) - predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_18] (rows=85 width=534) - default@sr,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] - <-Reducer 9 [SIMPLE_EDGE] llap - SHUFFLE [RS_37] - PartitionCols:_col2, _col4 - Merge Join Operator [MERGEJOIN_185] (rows=2 width=352) - Conds:RS_34._col1=RS_209._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 14 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_209] - PartitionCols:_col0 - Select Operator [SEL_208] (rows=2 width=89) - Output:["_col0"] - Filter Operator [FIL_207] (rows=2 width=175) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=175) - default@src1,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 8 [SIMPLE_EDGE] llap - SHUFFLE [RS_34] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_184] (rows=2 width=352) - Conds:RS_31._col3=RS_206._col0(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 13 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_206] - PartitionCols:_col0 - Select Operator [SEL_205] (rows=6 width=91) - Output:["_col0"] - Filter Operator [FIL_204] (rows=6 width=178) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_12] (rows=2000 width=178) - default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 7 [SIMPLE_EDGE] llap - SHUFFLE [RS_31] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_183] (rows=2 width=443) - Conds:RS_203._col0=RS_199._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_199] - PartitionCols:_col0 - Select Operator [SEL_196] (rows=2 width=87) - Output:["_col0"] - Filter Operator [FIL_193] (rows=2 width=178) - predicate:((value = 'd1value') and key is not null) - Please refer to the previous TableScan [TS_3] - <-Map 12 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_203] - PartitionCols:_col0 - Select Operator [SEL_202] (rows=7 width=443) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_201] (rows=7 width=534) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_6] (rows=85 width=534) - default@ss_n1,ss_n1,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_44] - PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_182] (rows=70 width=269) - Conds:RS_191._col0=RS_198._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 6 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_198] - PartitionCols:_col0 - Select Operator [SEL_195] (rows=5 width=87) - Output:["_col0"] - Filter Operator [FIL_192] (rows=5 width=178) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_191] - PartitionCols:_col0 - Select Operator [SEL_190] (rows=170 width=356) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_189] (rows=170 width=356) - predicate:(v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_0] (rows=170 width=356) - default@cs,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v2","k3","v3"] + Merge Join Operator [MERGEJOIN_187] (rows=28 width=534) + Conds:RS_44._col1, _col3=RS_45._col10, _col12(Inner),Output:["_col2","_col3","_col7","_col8","_col14","_col15"] + <-Reducer 10 [SIMPLE_EDGE] llap + SHUFFLE [RS_45] + PartitionCols:_col10, _col12 + Select Operator [SEL_40] (rows=2 width=447) + Output:["_col2","_col3","_col9","_col10","_col12"] + Merge Join Operator [MERGEJOIN_186] (rows=2 width=447) + Conds:RS_37._col2, _col4=RS_38._col1, _col3(Inner),Output:["_col0","_col1","_col9","_col10","_col12"] + <-Reducer 11 [SIMPLE_EDGE] llap + SHUFFLE [RS_38] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_185] (rows=5 width=356) + Conds:RS_211._col0=RS_199._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_199] + PartitionCols:_col0 + Select Operator [SEL_196] (rows=5 width=87) + Output:["_col0"] + Filter Operator [FIL_193] (rows=5 width=178) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=178) + default@src,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 15 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_211] + PartitionCols:_col0 + Select Operator [SEL_210] (rows=7 width=443) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_209] (rows=7 width=534) + predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_18] (rows=85 width=534) + default@sr,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] + <-Reducer 9 [SIMPLE_EDGE] llap + SHUFFLE [RS_37] + PartitionCols:_col2, _col4 + Merge Join Operator [MERGEJOIN_184] (rows=2 width=352) + Conds:RS_34._col1=RS_208._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 14 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_208] + PartitionCols:_col0 + Select Operator [SEL_207] (rows=2 width=89) + Output:["_col0"] + Filter Operator [FIL_206] (rows=2 width=175) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_15] (rows=25 width=175) + default@src1,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 8 [SIMPLE_EDGE] llap + SHUFFLE [RS_34] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_183] (rows=2 width=352) + Conds:RS_31._col3=RS_205._col0(Inner),Output:["_col0","_col1","_col2","_col4"] + <-Map 13 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_205] + PartitionCols:_col0 + Select Operator [SEL_204] (rows=6 width=91) + Output:["_col0"] + Filter Operator [FIL_203] (rows=6 width=178) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_12] (rows=2000 width=178) + default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 7 [SIMPLE_EDGE] llap + SHUFFLE [RS_31] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_182] (rows=2 width=443) + Conds:RS_202._col0=RS_198._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_198] + PartitionCols:_col0 + Select Operator [SEL_195] (rows=2 width=87) + Output:["_col0"] + Filter Operator [FIL_192] (rows=2 width=178) + predicate:((value = 'd1value') and key is not null) + Please refer to the previous TableScan [TS_3] + <-Map 12 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_202] + PartitionCols:_col0 + Select Operator [SEL_201] (rows=7 width=443) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_200] (rows=7 width=534) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_6] (rows=85 width=534) + default@ss_n1,ss_n1,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v1","k2","v2","k3","v3"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_44] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_181] (rows=70 width=269) + Conds:RS_190._col0=RS_197._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 6 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_197] + PartitionCols:_col0 + Select Operator [SEL_194] (rows=5 width=87) + Output:["_col0"] + Filter Operator [FIL_191] (rows=5 width=178) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_190] + PartitionCols:_col0 + Select Operator [SEL_189] (rows=170 width=356) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_188] (rows=170 width=356) + predicate:(v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_0] (rows=170 width=356) + default@cs,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["k1","v2","k3","v3"] PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -1069,107 +1067,105 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized, llap - File Output Operator [FS_234] - Limit [LIM_233] (rows=100 width=10) + File Output Operator [FS_232] + Limit [LIM_231] (rows=100 width=10) Number of rows:100 - Select Operator [SEL_232] (rows=805 width=10) + Select Operator [SEL_230] (rows=805 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_231] - Group By Operator [GBY_230] (rows=805 width=10) + SHUFFLE [RS_229] + Group By Operator [GBY_228] (rows=805 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 3 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_229] + SHUFFLE [RS_227] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_228] (rows=1610 width=10) + Group By Operator [GBY_226] (rows=1610 width=10) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col9)","count(_col16)","count(_col3)"],keys:_col8, _col15, _col2 - Top N Key Operator [TNK_227] (rows=1610 width=10) - keys:_col8, _col15, _col2,top n:100 - Map Join Operator [MAPJOIN_226] (rows=1610 width=10) - Conds:RS_202._col1, _col3=SEL_225._col11, _col13(Inner),Output:["_col2","_col3","_col8","_col9","_col15","_col16"] - <-Map 2 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_202] - PartitionCols:_col1, _col3 - Map Join Operator [MAPJOIN_201] (rows=550 width=10) - Conds:RS_198._col0=SEL_200._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_198] + Map Join Operator [MAPJOIN_225] (rows=1610 width=10) + Conds:RS_201._col1, _col3=SEL_224._col11, _col13(Inner),Output:["_col2","_col3","_col8","_col9","_col15","_col16"] + <-Map 2 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_201] + PartitionCols:_col1, _col3 + Map Join Operator [MAPJOIN_200] (rows=550 width=10) + Conds:RS_197._col0=SEL_199._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_197] + PartitionCols:_col0 + Select Operator [SEL_196] (rows=170 width=34) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_195] (rows=170 width=34) + predicate:(v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_0] (rows=170 width=34) + default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + <-Select Operator [SEL_199] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_198] (rows=500 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=10) + default@src,d3,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_224] (rows=1464 width=10) + Output:["_col3","_col4","_col10","_col11","_col13"] + Map Join Operator [MAPJOIN_223] (rows=1464 width=10) + Conds:MAPJOIN_222._col3, _col5=RS_217._col2, _col4(Inner),Output:["_col1","_col2","_col10","_col11","_col13"] + <-Map 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_217] + PartitionCols:_col2, _col4 + Map Join Operator [MAPJOIN_216] (rows=550 width=10) + Conds:SEL_215._col0=RS_213._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 10 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_213] + PartitionCols:_col0 + Select Operator [SEL_212] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_211] (rows=42 width=34) + predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_21] (rows=85 width=34) + default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_215] (rows=500 width=10) + Output:["_col0"] + Filter Operator [FIL_214] (rows=500 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_18] (rows=500 width=10) + default@src,d2,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_222] (rows=1331 width=10) + Conds:MAPJOIN_221._col2=RS_210._col0(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map 8 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_210] PartitionCols:_col0 - Select Operator [SEL_197] (rows=170 width=34) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_196] (rows=170 width=34) - predicate:(v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_0] (rows=170 width=34) - default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Select Operator [SEL_200] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_199] (rows=500 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=10) - default@src,d3,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_225] (rows=1464 width=10) - Output:["_col3","_col4","_col10","_col11","_col13"] - Map Join Operator [MAPJOIN_224] (rows=1464 width=10) - Conds:MAPJOIN_223._col3, _col5=RS_218._col2, _col4(Inner),Output:["_col1","_col2","_col10","_col11","_col13"] - <-Map 9 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_218] - PartitionCols:_col2, _col4 - Map Join Operator [MAPJOIN_217] (rows=550 width=10) - Conds:SEL_216._col0=RS_214._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 10 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_214] - PartitionCols:_col0 - Select Operator [SEL_213] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_212] (rows=42 width=34) - predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_21] (rows=85 width=34) - default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_216] (rows=500 width=10) - Output:["_col0"] - Filter Operator [FIL_215] (rows=500 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_18] (rows=500 width=10) - default@src,d2,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_223] (rows=1331 width=10) - Conds:MAPJOIN_222._col2=RS_211._col0(Inner),Output:["_col1","_col2","_col3","_col5"] - <-Map 8 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_211] + Select Operator [SEL_209] (rows=12 width=7) + Output:["_col0"] + Filter Operator [FIL_208] (rows=12 width=7) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_15] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_221] (rows=1210 width=10) + Conds:MAPJOIN_220._col1=RS_207._col0(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map 7 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_207] PartitionCols:_col0 - Select Operator [SEL_210] (rows=12 width=7) + Select Operator [SEL_206] (rows=250 width=10) Output:["_col0"] - Filter Operator [FIL_209] (rows=12 width=7) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_222] (rows=1210 width=10) - Conds:MAPJOIN_221._col1=RS_208._col0(Inner),Output:["_col1","_col2","_col3","_col5"] - <-Map 7 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_208] - PartitionCols:_col0 - Select Operator [SEL_207] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_206] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_12] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_221] (rows=1100 width=10) - Conds:SEL_220._col0=RS_205._col3(Inner),Output:["_col1","_col2","_col3","_col5"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_205] - PartitionCols:_col3 - Select Operator [SEL_204] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_203] (rows=42 width=34) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss_n1,ss_n1,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_220] (rows=1000 width=10) - Output:["_col0"] - Filter Operator [FIL_219] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Filter Operator [FIL_205] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_12] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_220] (rows=1100 width=10) + Conds:SEL_219._col0=RS_204._col3(Inner),Output:["_col1","_col2","_col3","_col5"] + <-Map 6 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_204] + PartitionCols:_col3 + Select Operator [SEL_203] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_202] (rows=42 width=34) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_9] (rows=85 width=34) + default@ss_n1,ss_n1,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_219] (rows=1000 width=10) + Output:["_col0"] + Filter Operator [FIL_218] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_6] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out index 545cce75a9..d11c3d7ea9 100644 --- ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out +++ ql/src/test/results/clientpositive/llap/external_jdbc_table_perf.q.out @@ -1920,27 +1920,21 @@ GROUP BY "t0"."cs_ship_customer_sk" expressions: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) outputColumnNames: _col32, _col33, _col34, _col35, _col36 Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++++ + Group By Operator + aggregations: count() keys: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) - null sort order: zzzzz + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - top n: 100 - Group By Operator - aggregations: count() - keys: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) + null sort order: zzzzz + sort order: +++++ + Map-reduce partition columns: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) - null sort order: zzzzz - sort order: +++++ - Map-reduce partition columns: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) - Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col5 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col5 (type: bigint) Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2410,27 +2404,21 @@ GROUP BY "t0"."cs_ship_customer_sk" expressions: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) outputColumnNames: _col32, _col33, _col34, _col35, _col36 Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: +++++ + Group By Operator + aggregations: count() keys: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) - null sort order: zzzzz + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - top n: 100 - Group By Operator - aggregations: count() - keys: _col32 (type: char(1)), _col33 (type: char(1)), _col34 (type: char(20)), _col35 (type: int), _col36 (type: char(10)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) + null sort order: zzzzz + sort order: +++++ + Map-reduce partition columns: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) - null sort order: zzzzz - sort order: +++++ - Map-reduce partition columns: _col0 (type: char(1)), _col1 (type: char(1)), _col2 (type: char(20)), _col3 (type: int), _col4 (type: char(10)) - Statistics: Num rows: 1 Data size: 499 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col5 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col5 (type: bigint) Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/filter_union.q.out ql/src/test/results/clientpositive/llap/filter_union.q.out index 0df77762a0..b7b741f379 100644 --- ql/src/test/results/clientpositive/llap/filter_union.q.out +++ ql/src/test/results/clientpositive/llap/filter_union.q.out @@ -530,26 +530,20 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count(key) keys: key (type: string) - null sort order: a - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 0 - Group By Operator - aggregations: count(key) - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/limit_pushdown.q.out ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index 3fdd77d802..dfa9bdfd57 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -213,27 +213,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -321,27 +315,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double), _col2 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -433,25 +421,19 @@ STAGE PLANS: expressions: cdouble (type: double) outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator keys: cdouble (type: double) - null sort order: z - Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - keys: cdouble (type: double) - minReductionHashAggr: 0.55013025 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + minReductionHashAggr: 0.55013025 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -461,13 +443,13 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1009,48 +991,29 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: key (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 2 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: bigint) - Top N Key Operator - sort order: + - keys: key (type: string) - null sort order: a - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 3 - Group By Operator - aggregations: count() - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index efa8c38d7c..e11490856a 100644 --- ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -214,27 +214,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -337,27 +331,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double), _col2 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -464,25 +452,19 @@ STAGE PLANS: expressions: cdouble (type: double) outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator keys: cdouble (type: double) - null sort order: z - Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - keys: cdouble (type: double) - minReductionHashAggr: 0.55013025 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + minReductionHashAggr: 0.55013025 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -492,12 +474,12 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) null sort order: z sort order: + - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 Execution mode: vectorized, llap @@ -505,13 +487,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out index ffe5f6fb22..cd0fd219e3 100644 --- ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out +++ ql/src/test/results/clientpositive/llap/llap_decimal64_reader.q.out @@ -135,25 +135,19 @@ STAGE PLANS: Filter Operator predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean) Statistics: Num rows: 12288 Data size: 2752512 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: decimal(38,5)) - null sort order: aa - Statistics: Num rows: 12288 Data size: 2752512 Basic stats: COMPLETE Column stats: COMPLETE - top n: 2 - Group By Operator - keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: decimal(38,5)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5)) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5)) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5)) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5)) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -243,25 +237,19 @@ STAGE PLANS: Filter Operator predicate: (cdecimal1) IN (3.35, 4.46) (type: boolean) Statistics: Num rows: 12288 Data size: 2752512 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ + Group By Operator keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: decimal(38,5)) - null sort order: aa - Statistics: Num rows: 12288 Data size: 2752512 Basic stats: COMPLETE Column stats: COMPLETE - top n: 2 - Group By Operator - keys: cdecimal1 (type: decimal(10,2)), cdecimal2 (type: decimal(38,5)) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5)) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5)) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5)) - null sort order: aa - sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(10,2)), _col1 (type: decimal(38,5)) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/offset_limit.q.out ql/src/test/results/clientpositive/llap/offset_limit.q.out index 23f2de46e5..36e9df4763 100644 --- ql/src/test/results/clientpositive/llap/offset_limit.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit.q.out @@ -30,27 +30,21 @@ STAGE PLANS: expressions: key (type: string), substr(value, 5) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs Reducer 2 diff --git ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index 4ecb7bc46d..7ed66b7681 100644 --- ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -215,27 +215,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -324,27 +318,21 @@ STAGE PLANS: expressions: value (type: string), (UDFToDouble(key) + 1.0D) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1, _col2 + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double), _col2 (type: bigint) + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -437,25 +425,19 @@ STAGE PLANS: expressions: cdouble (type: double) outputColumnNames: cdouble Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + + Group By Operator keys: cdouble (type: double) - null sort order: z - Statistics: Num rows: 12288 Data size: 73400 Basic stats: COMPLETE Column stats: COMPLETE - top n: 30 - Group By Operator - keys: cdouble (type: double) - minReductionHashAggr: 0.55013025 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + minReductionHashAggr: 0.55013025 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -465,14 +447,14 @@ STAGE PLANS: keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5528 Data size: 21816 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5528 Data size: 33024 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 10 - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out index 0eac389eb7..e3ecbd2c70 100644 --- ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/orc_struct_type_vectorization.q.out @@ -246,43 +246,33 @@ STAGE PLANS: projectedOutputColumnNums: [5] selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 5:int Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 5:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: a + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 5:int - native: true - Group By Operator - aggregations: sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out index 4362fb6f2e..05fe48bfce 100644 --- ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_complex_types_vectorization.q.out @@ -222,43 +222,33 @@ STAGE PLANS: projectedOutputColumnNums: [7, 8] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 8:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 7:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 7:int - native: true - Group By Operator - aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 8:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 7:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 38920 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -532,43 +522,33 @@ STAGE PLANS: projectedOutputColumnNums: [7, 8] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 8:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 7:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 7:int - native: true - Group By Operator - aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 8:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 7:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 38921 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -842,43 +822,33 @@ STAGE PLANS: projectedOutputColumnNums: [7, 8] selectExpressions: ListIndexColScalar(col 2:array, col 1:int) -> 7:int, ListIndexColScalar(col 2:array, col 0:int) -> 8:int Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 8:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 7:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 7:int - native: true - Group By Operator - aggregations: sum(_col1) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 8:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 7:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 38923 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index 24468c9a1b..876c743512 100644 --- ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -238,43 +238,33 @@ STAGE PLANS: projectedOutputColumnNums: [9, 10, 11] selectExpressions: VectorUDFMapIndexStringScalar(col 1:map, key: k1) -> 9:string, VectorUDFMapIndexLongScalar(col 2:map, key: 123) -> 10:int, VectorUDFMapIndexDecimalScalar(col 3:map, key: 123.123) -> 11:double Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col1), sum(_col2) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 10:int) -> bigint, VectorUDAFSumDouble(col 11:double) -> double + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 9:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] keys: _col0 (type: string) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 9:string - native: true - Group By Operator - aggregations: sum(_col1), sum(_col2) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 10:int) -> bigint, VectorUDAFSumDouble(col 11:double) -> double - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 9:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 511 Data size: 995378 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: double) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out index 45890a1890..ab73fa51df 100644 --- ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out +++ ql/src/test/results/clientpositive/llap/parquet_struct_type_vectorization.q.out @@ -246,43 +246,33 @@ STAGE PLANS: projectedOutputColumnNums: [5] selectExpressions: VectorUDFStructField(col 1:struct, col 0:int) -> 5:int Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 5:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] keys: _col0 (type: int) - null sort order: a + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 5:int - native: true - Group By Operator - aggregations: sum(_col0) - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 5:int) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 5:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 341 Data size: 76542 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out index 0e9723b8f3..a9c886ea33 100644 --- ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out +++ ql/src/test/results/clientpositive/llap/semijoin_reddedup.q.out @@ -406,27 +406,21 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col8 Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: -++++ + Group By Operator + aggregations: sum(_col8) keys: _col2 (type: double), _col3 (type: string), _col0 (type: bigint), _col4 (type: bigint), _col5 (type: string) - null sort order: zzaaa + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE - top n: 100 - Group By Operator - aggregations: sum(_col8) - keys: _col2 (type: double), _col3 (type: string), _col0 (type: bigint), _col4 (type: bigint), _col5 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: string) + null sort order: zzaaa + sort order: -++++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: string) Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: string) - null sort order: zzaaa - sort order: -++++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: string) - Statistics: Num rows: 6269989391 Data size: 95303838902 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col5 (type: double) + TopN Hash Memory Usage: 0.1 + value expressions: _col5 (type: double) Reducer 5 Execution mode: llap Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/llap/subquery_ALL.q.out ql/src/test/results/clientpositive/llap/subquery_ALL.q.out index d910c1a79d..c90045cabd 100644 --- ql/src/test/results/clientpositive/llap/subquery_ALL.q.out +++ ql/src/test/results/clientpositive/llap/subquery_ALL.q.out @@ -413,8 +413,8 @@ POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### 26 -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select count(*) from part where (p_partkey <> ALL (select p_partkey from part_null_n0 where p_partkey is null)) is null PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git ql/src/test/results/clientpositive/llap/subquery_ANY.q.out ql/src/test/results/clientpositive/llap/subquery_ANY.q.out index 91472d631e..8f023f371b 100644 --- ql/src/test/results/clientpositive/llap/subquery_ANY.q.out +++ ql/src/test/results/clientpositive/llap/subquery_ANY.q.out @@ -320,8 +320,8 @@ POSTHOOK: Input: default@part POSTHOOK: Input: default@part_null_n0 #### A masked pattern was here #### 26 -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[35][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_1, $hdt$_2, $hdt$_0]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select count(*) from part where (p_partkey = ANY (select p_partkey from part_null_n0 where p_partkey is null)) is null PREHOOK: type: QUERY PREHOOK: Input: default@part diff --git ql/src/test/results/clientpositive/llap/topnkey.q.out ql/src/test/results/clientpositive/llap/topnkey.q.out index 1e77587f82..3150eefc7d 100644 --- ql/src/test/results/clientpositive/llap/topnkey.q.out +++ ql/src/test/results/clientpositive/llap/topnkey.q.out @@ -1,18 +1,13 @@ -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key` AS `$f0`, SUM(CAST(SUBSTR(`value`, 5) AS INTEGER)) AS `$f1` -FROM `default`.`src` -GROUP BY `key` -ORDER BY `key` -LIMIT 5 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -31,17 +26,16 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: + + keys: key (type: string) + null sort order: z Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z + top n: 5 + Select Operator + expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 Group By Operator aggregations: sum(_col1) keys: _col0 (type: string) @@ -55,69 +49,12 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) - auto parallelism: true Execution mode: llap LLAP IO: no inputs - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] Reducer 2 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -130,14 +67,10 @@ STAGE PLANS: null sort order: z sort order: + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) - auto parallelism: false Reducer 3 Execution mode: llap - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) @@ -148,26 +81,11 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -175,6 +93,19 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -189,12 +120,12 @@ POSTHOOK: Input: default@src 103 206 104 208 PREHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -207,67 +138,108 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a sort order: + - keys: key (type: string) - null sort order: z + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + null sort order: za + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 - Reducer 3 + value expressions: _col1 (type: string) + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -279,33 +251,42 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 -10 -100 -103 -104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -315,19 +296,22 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: src1 - filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) + Top N Key Operator + sort order: + + keys: key (type: string) + null sort order: a Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + top n: 5 Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -340,7 +324,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src2 @@ -367,30 +351,53 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col2 (type: string) + null sort order: aa Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) + top n: 5 + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) + Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -408,16 +415,276 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 -0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.a SCRIPT [] +POSTHOOK: Lineage: t_test.b SCRIPT [] +POSTHOOK: Lineage: t_test.c SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: a (type: int), b (type: int) + null sort order: zz + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + top n: 3 + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 1 +5 1 +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 1 +5 1 +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: a (type: int), b (type: int) + null sort order: zz + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + top n: 3 + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: a (type: int), b (type: int) + minReductionHashAggr: 0.375 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 2 +6 2 +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 2 +6 2 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index cc2dc47280..d2347fa8aa 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -145,43 +145,33 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2] Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: int) - null sort order: z - Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) + Group By Vectorization: + aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 2:int - native: true - Group By Operator - aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) - Group By Vectorization: - aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 2:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: _col0 (type: int) - minReductionHashAggr: 0.75500476 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + keys: _col0 (type: int) + minReductionHashAggr: 0.75500476 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 257 Data size: 40092 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index f7e76e5a8b..4cd3f4bdd4 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -106,43 +106,33 @@ STAGE PLANS: projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: char(20)) - null sort order: z - Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 1:char(20) - native: true - Group By Operator - aggregations: sum(_col1), count() - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 1:char(20) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: _col0 (type: char(20)) - minReductionHashAggr: 0.49900198 - mode: hash - outputColumnNames: _col0, _col1, _col2 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: _col0 (type: char(20)) + minReductionHashAggr: 0.500998 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: char(20)) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -319,43 +309,33 @@ STAGE PLANS: projectedOutputColumnNums: [1, 3] selectExpressions: CastStringToLong(col 0:char(10)) -> 3:int Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: - - keys: _col0 (type: char(20)) - null sort order: z - Statistics: Num rows: 501 Data size: 89178 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: sum(_col1), count() + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 1:char(20) - native: true - Group By Operator - aggregations: sum(_col1), count() - Group By Vectorization: - aggregators: VectorUDAFSumLong(col 3:int) -> bigint, VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 1:char(20) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: _col0 (type: char(20)) - minReductionHashAggr: 0.49900198 - mode: hash - outputColumnNames: _col0, _col1, _col2 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0, 1] + keys: _col0 (type: char(20)) + minReductionHashAggr: 0.500998 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + null sort order: z + sort order: - + Map-reduce partition columns: _col0 (type: char(20)) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(20)) - null sort order: z - sort order: - - Map-reduce partition columns: _col0 (type: char(20)) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint), _col2 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index 6fd15e7101..72fe7fcd2a 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -72,45 +72,35 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - null sort order: zza - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint - native: true - Group By Operator - aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zza + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zza - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:bigint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:bigint - Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -127,7 +117,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -290,45 +280,35 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - null sort order: zza - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint - native: true - Group By Operator - aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zza + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zza - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:bigint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:bigint - Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -345,7 +325,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -508,45 +488,35 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1] Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: +++ - keys: a (type: string), b (type: string), 0L (type: bigint) - null sort order: zza - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint - native: true - Group By Operator - aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: a (type: string), b (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: a (type: string), b (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + null sort order: zza + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:bigint Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - null sort order: zza - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:bigint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 3:bigint - Statistics: Num rows: 6 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -563,7 +533,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -726,41 +696,31 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++++ - keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) - null sort order: zaaa - Statistics: Num rows: 6 Data size: 1530 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 4:bigint - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string, col 1:string, col 2:string, ConstantVectorExpression(val 0) -> 5:bigint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: a (type: string), b (type: string), c (type: string), 0L (type: bigint) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 9 Data size: 2367 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + null sort order: zaaa + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:string, 1:string, 2:string, 3:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 9 Data size: 2367 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - null sort order: zaaa - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:string, 1:string, 2:string, 3:bigint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 9 Data size: 2367 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -777,7 +737,7 @@ STAGE PLANS: includeColumns: [0, 1, 2] dataColumns: a:string, b:string, c:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] + scratchColumnTypeNames: [bigint] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -936,41 +896,31 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: a (type: string) - null sort order: z - Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:string - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: a (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: a (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - keyColumns: 0:string - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1133,45 +1083,35 @@ STAGE PLANS: projectedOutputColumnNums: [6] selectExpressions: DoubleColAddDoubleColumn(col 4:double, col 5:double)(children: CastStringToDouble(col 0:string) -> 4:double, CastStringToDouble(col 1:string) -> 5:double) -> 6:double Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: double) - null sort order: z - Statistics: Num rows: 6 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE - top n: 10 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 6:double - native: true - Group By Operator - aggregations: count() - Group By Vectorization: - aggregators: VectorUDAFCountStar(*) -> bigint - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 6:double - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: _col0 (type: double) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: _col0 (type: double) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:double + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 0:double - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 1:bigint - Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index d6325982e3..529c929013 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -270,40 +270,30 @@ STAGE PLANS: native: true projectedOutputColumnNums: [9] Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: ss_ticket_number (type: int) - null sort order: z - Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 9:int - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 9:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: ss_ticket_number (type: int) - minReductionHashAggr: 0.915 - mode: hash - outputColumnNames: _col0 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ss_ticket_number (type: int) + minReductionHashAggr: 0.915 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 85 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out index 4d417b9c3d..f6597bd5ff 100644 --- ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out @@ -380,27 +380,21 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col4 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Top N Key Operator - sort order: + + Group By Operator + aggregations: count() keys: _col4 (type: string) - null sort order: z + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - top n: 100 - Group By Operator - aggregations: count() - keys: _col4 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: bigint) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 97a211cfc6..a15c7732ae 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -64,43 +64,33 @@ STAGE PLANS: predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2:decimal(20,10)), SelectColumnIsNotNull(col 3:decimal(23,14))) predicate: (cdecimal1 is not null and cdecimal2 is not null) (type: boolean) Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++++ - keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - null sort order: zzzz - Statistics: Num rows: 6102 Data size: 1440072 Basic stats: COMPLETE Column stats: COMPLETE - top n: 50 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + aggregations: min(cdecimal1) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 2:decimal(20,10)) -> decimal(20,10) + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) - native: true - Group By Operator - aggregations: min(cdecimal1) - Group By Vectorization: - aggregators: VectorUDAFMinDecimal(col 2:decimal(20,10)) -> decimal(20,10) - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int, col 1:double, col 2:decimal(20,10), col 3:decimal(23,14) - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [0] - keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) - minReductionHashAggr: 0.0 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6102 Data size: 2123496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + null sort order: zzzz + sort order: ++++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6102 Data size: 2123496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) - null sort order: zzzz - sort order: ++++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double), _col2 (type: decimal(20,10)), _col3 (type: decimal(23,14)) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6102 Data size: 2123496 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col4 (type: decimal(20,10)) + TopN Hash Memory Usage: 0.1 + value expressions: _col4 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index a8019be7aa..973ddaf43b 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -360,40 +360,30 @@ STAGE PLANS: projectedOutputColumnNums: [25] selectExpressions: StringGroupConcatColCol(col 22:string, col 24:string)(children: StringGroupColConcatStringScalar(col 21:string, val -)(children: StringScalarConcatStringGroupCol(val Quarter , col 20:string)(children: CastLongToString(col 19:int)(children: CastDoubleToLong(col 18:double)(children: DoubleColAddDoubleScalar(col 17:double, val 1.0)(children: DoubleColDivideDoubleScalar(col 16:double, val 3.0)(children: CastLongToDouble(col 15:int)(children: LongColSubtractLongScalar(col 14:int, val 1)(children: VectorUDFMonthDate(col 12, field MONTH) -> 14:int) -> 15:int) -> 16:double) -> 17:double) -> 18:double) -> 19:int) -> 20:string) -> 21:string) -> 22:string, CastLongToString(col 23:int)(children: VectorUDFYearDate(col 12, field YEAR) -> 23:int) -> 24:string) -> 25:string Statistics: Num rows: 2000 Data size: 106288 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 2000 Data size: 106288 Basic stats: COMPLETE Column stats: COMPLETE - top n: 50 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 25:string - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 25:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Reduce Sink Vectorization: - className: VectorReduceSinkStringOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_topnkey.q.out ql/src/test/results/clientpositive/llap/vector_topnkey.q.out index c140bdfd37..9e382c051f 100644 --- ql/src/test/results/clientpositive/llap/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/llap/vector_topnkey.q.out @@ -87,24 +87,24 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cint1:int, 1:cint2:int, 2:cdouble:double, 3:cvarchar:varchar(50), 4:cdecimal1:decimal(10,2)/DECIMAL_64, 5:cdecimal2:decimal(38,5), 6:ROW__ID:struct] - Select Operator - expressions: cint1 (type: int) - outputColumnNames: cint1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] + Top N Key Operator + sort order: + + keys: cint1 (type: int) + null sort order: z Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cint1 (type: int) - null sort order: z - Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - top n: 3 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int + top n: 3 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:int + native: true + Select Operator + expressions: cint1 (type: int) + outputColumnNames: cint1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 7326adf522..680c1134ac 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -510,42 +510,32 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: ctinyint (type: tinyint) - null sort order: z - Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE - top n: 20 - Top N Key Vectorization: - className: VectorTopNKeyOperator + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH keyExpressions: col 0:tinyint - native: true - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:tinyint - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: ctinyint (type: tinyint) - minReductionHashAggr: 0.9893392 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumns: 0:tinyint - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumns: 0:tinyint - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: ctinyint (type: tinyint) + minReductionHashAggr: 0.9893392 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:tinyint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: 0:tinyint + Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -590,19 +580,19 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 131 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 131 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out index e9308cd709..d1e8c3806e 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1173][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1180][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1187][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1175][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1182][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[1189][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain cbo with cross_items as (select i_item_sk ss_item_sk diff --git ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out index 02caf99f7d..aa080603e1 100644 --- ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out +++ ql/src/test/results/clientpositive/perf/tez/cbo_query77.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[317][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[319][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain cbo with ss as (select s_store_sk, diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out index 43e1b2b5c2..59fcf951fe 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1191][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1198][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1205][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1193][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1200][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[1207][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain cbo with cross_items as (select i_item_sk ss_item_sk diff --git ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out index 2f75361df1..39da7ea903 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query77.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[227][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product PREHOOK: query: explain cbo with ss as (select s_store_sk, diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out index bb3b1b6660..d245a60587 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out @@ -157,120 +157,120 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_229] - Limit [LIM_228] (rows=1 width=419) + File Output Operator [FS_234] + Limit [LIM_233] (rows=1 width=419) Number of rows:100 - Select Operator [SEL_227] (rows=1 width=419) + Select Operator [SEL_232] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] - Select Operator [SEL_225] (rows=1 width=419) + SHUFFLE [RS_231] + Select Operator [SEL_230] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] - Group By Operator [GBY_224] (rows=1 width=379) + Group By Operator [GBY_229] (rows=1 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Group By Operator [GBY_65] (rows=3 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Top N Key Operator [TNK_102] (rows=1401496 width=379) - keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,top n:100 - Select Operator [SEL_64] (rows=1401496 width=379) - Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Select Operator [SEL_64] (rows=1401496 width=379) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Top N Key Operator [TNK_105] (rows=1401496 width=379) + keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,top n:100 Filter Operator [FIL_63] (rows=1401496 width=379) predicate:(_col14 is not null or _col16 is not null) - Merge Join Operator [MERGEJOIN_180] (rows=1401496 width=379) - Conds:RS_60._col0=RS_223._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col16"] + Merge Join Operator [MERGEJOIN_185] (rows=1401496 width=379) + Conds:RS_60._col0=RS_228._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col16"] <-Reducer 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] + SHUFFLE [RS_228] PartitionCols:_col1 - Select Operator [SEL_222] (rows=1401496 width=7) + Select Operator [SEL_227] (rows=1401496 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_221] (rows=1401496 width=3) + Group By Operator [GBY_226] (rows=1401496 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 Group By Operator [GBY_42] (rows=285115246 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_177] (rows=285115246 width=3) - Conds:RS_220._col0=RS_195._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_182] (rows=285115246 width=3) + Conds:RS_225._col0=RS_200._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] + PARTITION_ONLY_SHUFFLE [RS_200] PartitionCols:_col0 - Select Operator [SEL_190] (rows=201 width=4) + Select Operator [SEL_195] (rows=201 width=4) Output:["_col0"] - Filter Operator [FIL_189] (rows=201 width=12) + Filter Operator [FIL_194] (rows=201 width=12) predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7) TableScan [TS_11] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_219] (rows=285115246 width=7) + Select Operator [SEL_224] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_218] (rows=285115246 width=7) + Filter Operator [FIL_223] (rows=285115246 width=7) predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) TableScan [TS_32] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_202] - Group By Operator [GBY_199] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_207] + Group By Operator [GBY_204] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_196] (rows=201 width=4) + Select Operator [SEL_201] (rows=201 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_179] (rows=1414922 width=379) - Conds:RS_57._col0=RS_215._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Merge Join Operator [MERGEJOIN_184] (rows=1414922 width=379) + Conds:RS_57._col0=RS_220._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] + SHUFFLE [RS_220] PartitionCols:_col1 - Select Operator [SEL_214] (rows=1414922 width=7) + Select Operator [SEL_219] (rows=1414922 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_213] (rows=1414922 width=3) + Group By Operator [GBY_218] (rows=1414922 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 Group By Operator [GBY_28] (rows=143930993 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_176] (rows=143930993 width=3) - Conds:RS_212._col0=RS_193._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_181] (rows=143930993 width=3) + Conds:RS_217._col0=RS_198._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_193] + PARTITION_ONLY_SHUFFLE [RS_198] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + SHUFFLE [RS_217] PartitionCols:_col0 - Select Operator [SEL_211] (rows=143930993 width=7) + Select Operator [SEL_216] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_210] (rows=143930993 width=7) + Filter Operator [FIL_215] (rows=143930993 width=7) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) TableScan [TS_18] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_209] - Group By Operator [GBY_208] (rows=1 width=12) + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_201] - Group By Operator [GBY_198] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_206] + Group By Operator [GBY_203] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_194] (rows=201 width=4) + Select Operator [SEL_199] (rows=201 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_178] (rows=525327388 width=375) + Merge Join Operator [MERGEJOIN_183] (rows=525327388 width=375) Conds:RS_54._col0=RS_55._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_55] @@ -279,64 +279,64 @@ Stage-0 Output:["_col0"],keys:_col0 Select Operator [SEL_17] (rows=525327388 width=3) Output:["_col0"] - Merge Join Operator [MERGEJOIN_175] (rows=525327388 width=3) - Conds:RS_207._col0=RS_191._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_180] (rows=525327388 width=3) + Conds:RS_212._col0=RS_196._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_191] + PARTITION_ONLY_SHUFFLE [RS_196] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] + SHUFFLE [RS_212] PartitionCols:_col0 - Select Operator [SEL_206] (rows=525327388 width=7) + Select Operator [SEL_211] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_205] (rows=525327388 width=7) + Filter Operator [FIL_210] (rows=525327388 width=7) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) TableScan [TS_8] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_204] - Group By Operator [GBY_203] (rows=1 width=12) + BROADCAST [RS_209] + Group By Operator [GBY_208] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_200] - Group By Operator [GBY_197] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_205] + Group By Operator [GBY_202] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_192] (rows=201 width=4) + Select Operator [SEL_197] (rows=201 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_174] (rows=228127 width=375) - Conds:RS_49._col1=RS_188._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_179] (rows=228127 width=375) + Conds:RS_49._col1=RS_193._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] + SHUFFLE [RS_193] PartitionCols:_col0 - Select Operator [SEL_187] (rows=1861800 width=375) + Select Operator [SEL_192] (rows=1861800 width=375) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] TableScan [TS_6] (rows=1861800 width=375) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_173] (rows=224946 width=4) - Conds:RS_183._col2=RS_186._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_178] (rows=224946 width=4) + Conds:RS_188._col2=RS_191._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_183] + SHUFFLE [RS_188] PartitionCols:_col2 - Select Operator [SEL_182] (rows=77201384 width=11) + Select Operator [SEL_187] (rows=77201384 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_181] (rows=77201384 width=11) + Filter Operator [FIL_186] (rows=77201384 width=11) predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=11) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + SHUFFLE [RS_191] PartitionCols:_col0 - Select Operator [SEL_185] (rows=116550 width=102) + Select Operator [SEL_190] (rows=116550 width=102) Output:["_col0"] - Filter Operator [FIL_184] (rows=116550 width=102) + Filter Operator [FIL_189] (rows=116550 width=102) predicate:(ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') TableScan [TS_3] (rows=40000000 width=102) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out index 228b20a8d7..65d3faa20f 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1191][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1198][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1205][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1193][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1200][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[1207][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk @@ -296,36 +296,36 @@ Stage-0 limit:100 Stage-1 Reducer 9 vectorized - File Output Operator [FS_1361] - Limit [LIM_1360] (rows=100 width=223) + File Output Operator [FS_1363] + Limit [LIM_1362] (rows=100 width=223) Number of rows:100 - Select Operator [SEL_1359] (rows=304320 width=223) + Select Operator [SEL_1361] (rows=304320 width=223) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1358] - Select Operator [SEL_1357] (rows=304320 width=223) + SHUFFLE [RS_1360] + Select Operator [SEL_1359] (rows=304320 width=223) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_1356] (rows=304320 width=231) + Group By Operator [GBY_1358] (rows=304320 width=231) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 7 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_1204] + Reduce Output Operator [RS_1206] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1203] (rows=304320 width=231) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1202] (rows=121728 width=221) - keys:_col0, _col1, _col2, _col3, 0L,top n:100 - Select Operator [SEL_1200] (rows=40576 width=223) + Top N Key Operator [TNK_1205] (rows=304320 width=231) + keys:_col0, _col1, _col2, _col3,top n:100 + Group By Operator [GBY_1204] (rows=304320 width=231) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Select Operator [SEL_1202] (rows=40576 width=223) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1199] (rows=40576 width=244) + Filter Operator [FIL_1201] (rows=40576 width=244) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1198] (rows=121728 width=244) + Merge Join Operator [MERGEJOIN_1200] (rows=121728 width=244) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1373] - Filter Operator [FIL_1372] (rows=121728 width=132) + PARTITION_ONLY_SHUFFLE [RS_1375] + Filter Operator [FIL_1374] (rows=121728 width=132) predicate:_col3 is not null - Group By Operator [GBY_1371] (rows=121728 width=132) + Group By Operator [GBY_1373] (rows=121728 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_238] @@ -334,374 +334,374 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_235] (rows=286549727 width=127) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1171] (rows=286549727 width=127) - Conds:RS_232._col1=RS_1339._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_1173] (rows=286549727 width=127) + Conds:RS_232._col1=RS_1341._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1339] + SHUFFLE [RS_1341] PartitionCols:_col0 - Select Operator [SEL_1330] (rows=462000 width=15) + Select Operator [SEL_1332] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] TableScan [TS_81] (rows=462000 width=15) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_232] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1170] (rows=286549727 width=119) + Merge Join Operator [MERGEJOIN_1172] (rows=286549727 width=119) Conds:RS_229._col1=RS_230._col0(Inner),Output:["_col1","_col2","_col3"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_229] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1162] (rows=286549727 width=119) - Conds:RS_1366._col0=RS_1306._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1164] (rows=286549727 width=119) + Conds:RS_1368._col0=RS_1308._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1306] + PARTITION_ONLY_SHUFFLE [RS_1308] PartitionCols:_col0 - Select Operator [SEL_1303] (rows=50 width=4) + Select Operator [SEL_1305] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_1302] (rows=50 width=12) + Filter Operator [FIL_1304] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 11)) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 79 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1366] + SHUFFLE [RS_1368] PartitionCols:_col0 - Select Operator [SEL_1365] (rows=286549727 width=123) + Select Operator [SEL_1367] (rows=286549727 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1364] (rows=286549727 width=123) + Filter Operator [FIL_1366] (rows=286549727 width=123) predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_227_date_dim_d_date_sk_min) AND DynamicValue(RS_227_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_227_date_dim_d_date_sk_bloom_filter))) TableScan [TS_143] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1363] - Group By Operator [GBY_1362] (rows=1 width=12) + BROADCAST [RS_1365] + Group By Operator [GBY_1364] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1314] - Group By Operator [GBY_1311] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1316] + Group By Operator [GBY_1313] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1307] (rows=50 width=4) + Select Operator [SEL_1309] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1303] + Please refer to the previous Select Operator [SEL_1305] <-Reducer 34 [SIMPLE_EDGE] SHUFFLE [RS_230] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1169] (rows=729 width=4) - Conds:RS_1347._col1, _col2, _col3=RS_1370._col0, _col1, _col2(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_1171] (rows=729 width=4) + Conds:RS_1349._col1, _col2, _col3=RS_1372._col0, _col1, _col2(Inner),Output:["_col0"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1347] + SHUFFLE [RS_1349] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1340] (rows=458612 width=15) + Select Operator [SEL_1342] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1331] (rows=458612 width=15) + Filter Operator [FIL_1333] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) Please refer to the previous TableScan [TS_81] <-Reducer 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1370] + SHUFFLE [RS_1372] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1369] (rows=1 width=12) + Select Operator [SEL_1371] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1368] (rows=1 width=20) + Filter Operator [FIL_1370] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1367] (rows=120960 width=20) + Group By Operator [GBY_1369] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 32 [SIMPLE_EDGE] <-Reducer 31 [CONTAINS] vectorized - Reduce Output Operator [RS_1424] + Reduce Output Operator [RS_1426] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1423] (rows=120960 width=20) + Group By Operator [GBY_1425] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1422] (rows=120960 width=20) + Group By Operator [GBY_1424] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_169] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_25] (rows=3144960 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1151] (rows=546042657 width=11) - Conds:RS_21._col1=RS_1344._col0(Inner),Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_1153] (rows=546042657 width=11) + Conds:RS_21._col1=RS_1346._col0(Inner),Output:["_col4","_col5","_col6"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1344] + SHUFFLE [RS_1346] PartitionCols:_col0 - Select Operator [SEL_1336] (rows=458612 width=15) + Select Operator [SEL_1338] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1327] (rows=458612 width=15) + Filter Operator [FIL_1329] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) Please refer to the previous TableScan [TS_81] <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1150] (rows=550076554 width=4) - Conds:RS_1418._col0=RS_1396._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1152] (rows=550076554 width=4) + Conds:RS_1420._col0=RS_1398._col0(Inner),Output:["_col1"] <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1396] + SHUFFLE [RS_1398] PartitionCols:_col0 - Select Operator [SEL_1395] (rows=1957 width=4) + Select Operator [SEL_1397] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1394] (rows=1957 width=8) + Filter Operator [FIL_1396] (rows=1957 width=8) predicate:d_year BETWEEN 1999 AND 2001 TableScan [TS_12] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1418] + SHUFFLE [RS_1420] PartitionCols:_col0 - Select Operator [SEL_1417] (rows=550076554 width=7) + Select Operator [SEL_1419] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1416] (rows=550076554 width=7) + Filter Operator [FIL_1418] (rows=550076554 width=7) predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_d1_d_date_sk_min) AND DynamicValue(RS_19_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_d1_d_date_sk_bloom_filter))) TableScan [TS_9] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 40 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1415] - Group By Operator [GBY_1414] (rows=1 width=12) + BROADCAST [RS_1417] + Group By Operator [GBY_1416] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1410] - Group By Operator [GBY_1406] (rows=1 width=12) + SHUFFLE [RS_1412] + Group By Operator [GBY_1408] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1397] (rows=1957 width=4) + Select Operator [SEL_1399] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Reducer 44 [CONTAINS] vectorized - Reduce Output Operator [RS_1438] + Reduce Output Operator [RS_1440] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1437] (rows=120960 width=20) + Group By Operator [GBY_1439] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1436] (rows=120960 width=20) + Group By Operator [GBY_1438] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 42 [SIMPLE_EDGE] SHUFFLE [RS_189] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_45] (rows=1693440 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1153] (rows=284448361 width=11) - Conds:RS_41._col1=RS_1345._col0(Inner),Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_1155] (rows=284448361 width=11) + Conds:RS_41._col1=RS_1347._col0(Inner),Output:["_col4","_col5","_col6"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1345] + SHUFFLE [RS_1347] PartitionCols:_col0 - Select Operator [SEL_1337] (rows=458612 width=15) + Select Operator [SEL_1339] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1328] (rows=458612 width=15) + Filter Operator [FIL_1330] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) Please refer to the previous TableScan [TS_81] <-Reducer 41 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1152] (rows=286549727 width=4) - Conds:RS_1432._col0=RS_1398._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1154] (rows=286549727 width=4) + Conds:RS_1434._col0=RS_1400._col0(Inner),Output:["_col1"] <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1398] + SHUFFLE [RS_1400] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Map 63 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1432] + SHUFFLE [RS_1434] PartitionCols:_col0 - Select Operator [SEL_1431] (rows=286549727 width=7) + Select Operator [SEL_1433] (rows=286549727 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1430] (rows=286549727 width=7) + Filter Operator [FIL_1432] (rows=286549727 width=7) predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_39_d2_d_date_sk_min) AND DynamicValue(RS_39_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_d2_d_date_sk_bloom_filter))) TableScan [TS_29] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk"] <-Reducer 46 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1429] - Group By Operator [GBY_1428] (rows=1 width=12) + BROADCAST [RS_1431] + Group By Operator [GBY_1430] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1411] - Group By Operator [GBY_1407] (rows=1 width=12) + SHUFFLE [RS_1413] + Group By Operator [GBY_1409] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1399] (rows=1957 width=4) + Select Operator [SEL_1401] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Reducer 50 [CONTAINS] vectorized - Reduce Output Operator [RS_1452] + Reduce Output Operator [RS_1454] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1451] (rows=120960 width=20) + Group By Operator [GBY_1453] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1450] (rows=120960 width=20) + Group By Operator [GBY_1452] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 48 [SIMPLE_EDGE] SHUFFLE [RS_210] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_66] (rows=846720 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1155] (rows=142911107 width=11) - Conds:RS_62._col1=RS_1346._col0(Inner),Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_1157] (rows=142911107 width=11) + Conds:RS_62._col1=RS_1348._col0(Inner),Output:["_col4","_col5","_col6"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1346] + SHUFFLE [RS_1348] PartitionCols:_col0 - Select Operator [SEL_1338] (rows=458612 width=15) + Select Operator [SEL_1340] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1329] (rows=458612 width=15) + Filter Operator [FIL_1331] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) Please refer to the previous TableScan [TS_81] <-Reducer 47 [SIMPLE_EDGE] SHUFFLE [RS_62] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1154] (rows=143966864 width=4) - Conds:RS_1446._col0=RS_1400._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1156] (rows=143966864 width=4) + Conds:RS_1448._col0=RS_1402._col0(Inner),Output:["_col1"] <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1400] + SHUFFLE [RS_1402] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Map 64 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1446] + SHUFFLE [RS_1448] PartitionCols:_col0 - Select Operator [SEL_1445] (rows=143966864 width=7) + Select Operator [SEL_1447] (rows=143966864 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1444] (rows=143966864 width=7) + Filter Operator [FIL_1446] (rows=143966864 width=7) predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_60_d3_d_date_sk_min) AND DynamicValue(RS_60_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_60_d3_d_date_sk_bloom_filter))) TableScan [TS_50] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk"] <-Reducer 52 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1443] - Group By Operator [GBY_1442] (rows=1 width=12) + BROADCAST [RS_1445] + Group By Operator [GBY_1444] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1412] - Group By Operator [GBY_1408] (rows=1 width=12) + SHUFFLE [RS_1414] + Group By Operator [GBY_1410] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1401] (rows=1957 width=4) + Select Operator [SEL_1403] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Reducer 59 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1377] - Select Operator [SEL_1376] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1379] + Select Operator [SEL_1378] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1375] (rows=1 width=120) + Filter Operator [FIL_1377] (rows=1 width=120) predicate:(_col0 is not null and _col1 is not null) - Group By Operator [GBY_1374] (rows=1 width=120) + Group By Operator [GBY_1376] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 58 [CUSTOM_SIMPLE_EDGE] <-Reducer 57 [CONTAINS] - Reduce Output Operator [RS_1259] - Group By Operator [GBY_1258] (rows=1 width=120) + Reduce Output Operator [RS_1261] + Group By Operator [GBY_1260] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1257] (rows=980593145 width=112) + Select Operator [SEL_1259] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1255] (rows=550076554 width=110) + Select Operator [SEL_1257] (rows=550076554 width=110) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1254] (rows=550076554 width=110) - Conds:RS_1461._col0=RS_1404._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1256] (rows=550076554 width=110) + Conds:RS_1463._col0=RS_1406._col0(Inner),Output:["_col1","_col2"] <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1404] + SHUFFLE [RS_1406] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1461] + SHUFFLE [RS_1463] PartitionCols:_col0 - Select Operator [SEL_1459] (rows=550076554 width=114) + Select Operator [SEL_1461] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1458] (rows=550076554 width=114) + Filter Operator [FIL_1460] (rows=550076554 width=114) predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_106_date_dim_d_date_sk_min) AND DynamicValue(RS_106_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_106_date_dim_d_date_sk_bloom_filter))) TableScan [TS_99] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] <-Reducer 56 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1457] - Group By Operator [GBY_1456] (rows=1 width=12) + BROADCAST [RS_1459] + Group By Operator [GBY_1458] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 39 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1413] - Group By Operator [GBY_1409] (rows=1 width=12) + SHUFFLE [RS_1415] + Group By Operator [GBY_1411] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1403] (rows=1957 width=4) + Select Operator [SEL_1405] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Reducer 69 [CONTAINS] - Reduce Output Operator [RS_1277] - Group By Operator [GBY_1276] (rows=1 width=120) + Reduce Output Operator [RS_1279] + Group By Operator [GBY_1278] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1275] (rows=980593145 width=112) + Select Operator [SEL_1277] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1273] (rows=286549727 width=115) + Select Operator [SEL_1275] (rows=286549727 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1272] (rows=286549727 width=115) - Conds:RS_1476._col0=RS_1467._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1274] (rows=286549727 width=115) + Conds:RS_1478._col0=RS_1469._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1467] + PARTITION_ONLY_SHUFFLE [RS_1469] PartitionCols:_col0 - Select Operator [SEL_1464] (rows=1957 width=4) + Select Operator [SEL_1466] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1463] (rows=1957 width=8) + Filter Operator [FIL_1465] (rows=1957 width=8) predicate:d_year BETWEEN 1998 AND 2000 TableScan [TS_112] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1476] + SHUFFLE [RS_1478] PartitionCols:_col0 - Select Operator [SEL_1474] (rows=286549727 width=119) + Select Operator [SEL_1476] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1473] (rows=286549727 width=119) + Filter Operator [FIL_1475] (rows=286549727 width=119) predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_116_date_dim_d_date_sk_min) AND DynamicValue(RS_116_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_116_date_dim_d_date_sk_bloom_filter))) TableScan [TS_109] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] <-Reducer 72 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1472] - Group By Operator [GBY_1471] (rows=1 width=12) + BROADCAST [RS_1474] + Group By Operator [GBY_1473] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 71 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1470] - Group By Operator [GBY_1469] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1472] + Group By Operator [GBY_1471] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1466] (rows=1957 width=4) + Select Operator [SEL_1468] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1466] <-Reducer 75 [CONTAINS] - Reduce Output Operator [RS_1295] - Group By Operator [GBY_1294] (rows=1 width=120) + Reduce Output Operator [RS_1297] + Group By Operator [GBY_1296] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1293] (rows=980593145 width=112) + Select Operator [SEL_1295] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1291] (rows=143966864 width=115) + Select Operator [SEL_1293] (rows=143966864 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1290] (rows=143966864 width=115) - Conds:RS_1491._col0=RS_1482._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1292] (rows=143966864 width=115) + Conds:RS_1493._col0=RS_1484._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1482] + PARTITION_ONLY_SHUFFLE [RS_1484] PartitionCols:_col0 - Select Operator [SEL_1479] (rows=1957 width=4) + Select Operator [SEL_1481] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1478] (rows=1957 width=8) + Filter Operator [FIL_1480] (rows=1957 width=8) predicate:d_year BETWEEN 1998 AND 2000 TableScan [TS_123] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1491] + SHUFFLE [RS_1493] PartitionCols:_col0 - Select Operator [SEL_1489] (rows=143966864 width=119) + Select Operator [SEL_1491] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1488] (rows=143966864 width=119) + Filter Operator [FIL_1490] (rows=143966864 width=119) predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_127_date_dim_d_date_sk_min) AND DynamicValue(RS_127_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_127_date_dim_d_date_sk_bloom_filter))) TableScan [TS_120] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] <-Reducer 78 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1487] - Group By Operator [GBY_1486] (rows=1 width=12) + BROADCAST [RS_1489] + Group By Operator [GBY_1488] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 77 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1485] - Group By Operator [GBY_1484] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1487] + Group By Operator [GBY_1486] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1481] (rows=1957 width=4) + Select Operator [SEL_1483] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1479] + Please refer to the previous Select Operator [SEL_1481] <-Reducer 22 [CONTAINS] - Reduce Output Operator [RS_1211] + Reduce Output Operator [RS_1213] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1210] (rows=304320 width=231) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1209] (rows=121728 width=221) - keys:_col0, _col1, _col2, _col3, 0L,top n:100 - Select Operator [SEL_1207] (rows=40576 width=219) + Top N Key Operator [TNK_1212] (rows=304320 width=231) + keys:_col0, _col1, _col2, _col3,top n:100 + Group By Operator [GBY_1211] (rows=304320 width=231) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Select Operator [SEL_1209] (rows=40576 width=219) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1206] (rows=40576 width=244) + Filter Operator [FIL_1208] (rows=40576 width=244) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1205] (rows=121728 width=244) + Merge Join Operator [MERGEJOIN_1207] (rows=121728 width=244) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1389] - Filter Operator [FIL_1388] (rows=121728 width=132) + PARTITION_ONLY_SHUFFLE [RS_1391] + Filter Operator [FIL_1390] (rows=121728 width=132) predicate:_col3 is not null - Group By Operator [GBY_1387] (rows=121728 width=132) + Group By Operator [GBY_1389] (rows=121728 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_382] @@ -710,185 +710,185 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_379] (rows=143966864 width=127) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1184] (rows=143966864 width=127) - Conds:RS_376._col1=RS_1341._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_1186] (rows=143966864 width=127) + Conds:RS_376._col1=RS_1343._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1341] + SHUFFLE [RS_1343] PartitionCols:_col0 - Select Operator [SEL_1332] (rows=462000 width=15) + Select Operator [SEL_1334] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] Please refer to the previous TableScan [TS_81] <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_376] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1183] (rows=143966864 width=119) + Merge Join Operator [MERGEJOIN_1185] (rows=143966864 width=119) Conds:RS_373._col1=RS_374._col0(Inner),Output:["_col1","_col2","_col3"] <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_373] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1175] (rows=143966864 width=119) - Conds:RS_1382._col0=RS_1308._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1177] (rows=143966864 width=119) + Conds:RS_1384._col0=RS_1310._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1308] + PARTITION_ONLY_SHUFFLE [RS_1310] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1303] + Please refer to the previous Select Operator [SEL_1305] <-Map 80 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1382] + SHUFFLE [RS_1384] PartitionCols:_col0 - Select Operator [SEL_1381] (rows=143966864 width=123) + Select Operator [SEL_1383] (rows=143966864 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1380] (rows=143966864 width=123) + Filter Operator [FIL_1382] (rows=143966864 width=123) predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_371_date_dim_d_date_sk_min) AND DynamicValue(RS_371_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_371_date_dim_d_date_sk_bloom_filter))) TableScan [TS_287] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1379] - Group By Operator [GBY_1378] (rows=1 width=12) + BROADCAST [RS_1381] + Group By Operator [GBY_1380] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1315] - Group By Operator [GBY_1312] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1317] + Group By Operator [GBY_1314] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1309] (rows=50 width=4) + Select Operator [SEL_1311] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1303] + Please refer to the previous Select Operator [SEL_1305] <-Reducer 38 [SIMPLE_EDGE] SHUFFLE [RS_374] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1182] (rows=729 width=4) - Conds:RS_1348._col1, _col2, _col3=RS_1386._col0, _col1, _col2(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_1184] (rows=729 width=4) + Conds:RS_1350._col1, _col2, _col3=RS_1388._col0, _col1, _col2(Inner),Output:["_col0"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1348] + SHUFFLE [RS_1350] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1342] (rows=458612 width=15) + Select Operator [SEL_1344] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1333] (rows=458612 width=15) + Filter Operator [FIL_1335] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) Please refer to the previous TableScan [TS_81] <-Reducer 37 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1386] + SHUFFLE [RS_1388] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1385] (rows=1 width=12) + Select Operator [SEL_1387] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1384] (rows=1 width=20) + Filter Operator [FIL_1386] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1383] (rows=120960 width=20) + Group By Operator [GBY_1385] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 36 [SIMPLE_EDGE] <-Reducer 35 [CONTAINS] vectorized - Reduce Output Operator [RS_1427] + Reduce Output Operator [RS_1429] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1426] (rows=120960 width=20) + Group By Operator [GBY_1428] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1425] (rows=120960 width=20) + Group By Operator [GBY_1427] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_313] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_25] <-Reducer 45 [CONTAINS] vectorized - Reduce Output Operator [RS_1441] + Reduce Output Operator [RS_1443] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1440] (rows=120960 width=20) + Group By Operator [GBY_1442] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1439] (rows=120960 width=20) + Group By Operator [GBY_1441] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 42 [SIMPLE_EDGE] SHUFFLE [RS_333] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_45] <-Reducer 51 [CONTAINS] vectorized - Reduce Output Operator [RS_1455] + Reduce Output Operator [RS_1457] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1454] (rows=120960 width=20) + Group By Operator [GBY_1456] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1453] (rows=120960 width=20) + Group By Operator [GBY_1455] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 48 [SIMPLE_EDGE] SHUFFLE [RS_354] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_66] <-Reducer 62 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1393] - Select Operator [SEL_1392] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1395] + Select Operator [SEL_1394] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1391] (rows=1 width=120) + Filter Operator [FIL_1393] (rows=1 width=120) predicate:(_col0 is not null and _col1 is not null) - Group By Operator [GBY_1390] (rows=1 width=120) + Group By Operator [GBY_1392] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 61 [CUSTOM_SIMPLE_EDGE] <-Reducer 60 [CONTAINS] - Reduce Output Operator [RS_1265] - Group By Operator [GBY_1264] (rows=1 width=120) + Reduce Output Operator [RS_1267] + Group By Operator [GBY_1266] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1263] (rows=980593145 width=112) + Select Operator [SEL_1265] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1261] (rows=550076554 width=110) + Select Operator [SEL_1263] (rows=550076554 width=110) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1260] (rows=550076554 width=110) - Conds:RS_1462._col0=RS_1405._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1262] (rows=550076554 width=110) + Conds:RS_1464._col0=RS_1407._col0(Inner),Output:["_col1","_col2"] <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1405] + SHUFFLE [RS_1407] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1462] + SHUFFLE [RS_1464] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1459] + Please refer to the previous Select Operator [SEL_1461] <-Reducer 70 [CONTAINS] - Reduce Output Operator [RS_1283] - Group By Operator [GBY_1282] (rows=1 width=120) + Reduce Output Operator [RS_1285] + Group By Operator [GBY_1284] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1281] (rows=980593145 width=112) + Select Operator [SEL_1283] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1279] (rows=286549727 width=115) + Select Operator [SEL_1281] (rows=286549727 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1278] (rows=286549727 width=115) - Conds:RS_1477._col0=RS_1468._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1280] (rows=286549727 width=115) + Conds:RS_1479._col0=RS_1470._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1468] + PARTITION_ONLY_SHUFFLE [RS_1470] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1466] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1477] + SHUFFLE [RS_1479] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1474] + Please refer to the previous Select Operator [SEL_1476] <-Reducer 76 [CONTAINS] - Reduce Output Operator [RS_1301] - Group By Operator [GBY_1300] (rows=1 width=120) + Reduce Output Operator [RS_1303] + Group By Operator [GBY_1302] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1299] (rows=980593145 width=112) + Select Operator [SEL_1301] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1297] (rows=143966864 width=115) + Select Operator [SEL_1299] (rows=143966864 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1296] (rows=143966864 width=115) - Conds:RS_1492._col0=RS_1483._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1298] (rows=143966864 width=115) + Conds:RS_1494._col0=RS_1485._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1483] + PARTITION_ONLY_SHUFFLE [RS_1485] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1479] + Please refer to the previous Select Operator [SEL_1481] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1492] + SHUFFLE [RS_1494] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1489] + Please refer to the previous Select Operator [SEL_1491] <-Reducer 6 [CONTAINS] - Reduce Output Operator [RS_1197] + Reduce Output Operator [RS_1199] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1196] (rows=304320 width=231) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1195] (rows=121728 width=221) - keys:_col0, _col1, _col2, _col3, 0L,top n:100 - Select Operator [SEL_1193] (rows=40576 width=221) + Top N Key Operator [TNK_1198] (rows=304320 width=231) + keys:_col0, _col1, _col2, _col3,top n:100 + Group By Operator [GBY_1197] (rows=304320 width=231) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Select Operator [SEL_1195] (rows=40576 width=221) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1192] (rows=40576 width=244) + Filter Operator [FIL_1194] (rows=40576 width=244) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1191] (rows=121728 width=244) + Merge Join Operator [MERGEJOIN_1193] (rows=121728 width=244) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1351] - Filter Operator [FIL_1350] (rows=121728 width=132) + PARTITION_ONLY_SHUFFLE [RS_1353] + Filter Operator [FIL_1352] (rows=121728 width=132) predicate:_col3 is not null - Group By Operator [GBY_1349] (rows=121728 width=132) + Group By Operator [GBY_1351] (rows=121728 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_95] @@ -897,165 +897,165 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_92] (rows=550076554 width=122) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1158] (rows=550076554 width=122) - Conds:RS_89._col1=RS_1334._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] + Merge Join Operator [MERGEJOIN_1160] (rows=550076554 width=122) + Conds:RS_89._col1=RS_1336._col0(Inner),Output:["_col2","_col3","_col7","_col8","_col9"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1334] + SHUFFLE [RS_1336] PartitionCols:_col0 - Select Operator [SEL_1325] (rows=462000 width=15) + Select Operator [SEL_1327] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] Please refer to the previous TableScan [TS_81] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_89] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1157] (rows=550076554 width=114) + Merge Join Operator [MERGEJOIN_1159] (rows=550076554 width=114) Conds:RS_86._col1=RS_87._col0(Inner),Output:["_col1","_col2","_col3"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_86] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1149] (rows=550076554 width=114) - Conds:RS_1320._col0=RS_1304._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1151] (rows=550076554 width=114) + Conds:RS_1322._col0=RS_1306._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1304] + PARTITION_ONLY_SHUFFLE [RS_1306] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1303] + Please refer to the previous Select Operator [SEL_1305] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1320] + SHUFFLE [RS_1322] PartitionCols:_col0 - Select Operator [SEL_1319] (rows=550076554 width=118) + Select Operator [SEL_1321] (rows=550076554 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1318] (rows=550076554 width=118) + Filter Operator [FIL_1320] (rows=550076554 width=118) predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_84_date_dim_d_date_sk_min) AND DynamicValue(RS_84_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_84_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1317] - Group By Operator [GBY_1316] (rows=1 width=12) + BROADCAST [RS_1319] + Group By Operator [GBY_1318] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1313] - Group By Operator [GBY_1310] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1315] + Group By Operator [GBY_1312] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1305] (rows=50 width=4) + Select Operator [SEL_1307] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1303] + Please refer to the previous Select Operator [SEL_1305] <-Reducer 30 [SIMPLE_EDGE] SHUFFLE [RS_87] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_1156] (rows=729 width=4) - Conds:RS_1343._col1, _col2, _col3=RS_1324._col0, _col1, _col2(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_1158] (rows=729 width=4) + Conds:RS_1345._col1, _col2, _col3=RS_1326._col0, _col1, _col2(Inner),Output:["_col0"] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1343] + SHUFFLE [RS_1345] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1335] (rows=458612 width=15) + Select Operator [SEL_1337] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1326] (rows=458612 width=15) + Filter Operator [FIL_1328] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null) Please refer to the previous TableScan [TS_81] <-Reducer 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1324] + SHUFFLE [RS_1326] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1323] (rows=1 width=12) + Select Operator [SEL_1325] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1322] (rows=1 width=20) + Filter Operator [FIL_1324] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1321] (rows=120960 width=20) + Group By Operator [GBY_1323] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 28 [SIMPLE_EDGE] <-Reducer 27 [CONTAINS] vectorized - Reduce Output Operator [RS_1421] + Reduce Output Operator [RS_1423] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1420] (rows=120960 width=20) + Group By Operator [GBY_1422] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1419] (rows=120960 width=20) + Group By Operator [GBY_1421] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_25] <-Reducer 43 [CONTAINS] vectorized - Reduce Output Operator [RS_1435] + Reduce Output Operator [RS_1437] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1434] (rows=120960 width=20) + Group By Operator [GBY_1436] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1433] (rows=120960 width=20) + Group By Operator [GBY_1435] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 42 [SIMPLE_EDGE] SHUFFLE [RS_46] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_45] <-Reducer 49 [CONTAINS] vectorized - Reduce Output Operator [RS_1449] + Reduce Output Operator [RS_1451] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1448] (rows=120960 width=20) + Group By Operator [GBY_1450] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1447] (rows=120960 width=20) + Group By Operator [GBY_1449] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 48 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_66] <-Reducer 55 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1355] - Select Operator [SEL_1354] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1357] + Select Operator [SEL_1356] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1353] (rows=1 width=120) + Filter Operator [FIL_1355] (rows=1 width=120) predicate:(_col0 is not null and _col1 is not null) - Group By Operator [GBY_1352] (rows=1 width=120) + Group By Operator [GBY_1354] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 54 [CUSTOM_SIMPLE_EDGE] <-Reducer 53 [CONTAINS] - Reduce Output Operator [RS_1253] - Group By Operator [GBY_1252] (rows=1 width=120) + Reduce Output Operator [RS_1255] + Group By Operator [GBY_1254] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1251] (rows=980593145 width=112) + Select Operator [SEL_1253] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1249] (rows=550076554 width=110) + Select Operator [SEL_1251] (rows=550076554 width=110) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1248] (rows=550076554 width=110) - Conds:RS_1460._col0=RS_1402._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1250] (rows=550076554 width=110) + Conds:RS_1462._col0=RS_1404._col0(Inner),Output:["_col1","_col2"] <-Map 39 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1402] + SHUFFLE [RS_1404] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1395] + Please refer to the previous Select Operator [SEL_1397] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1460] + SHUFFLE [RS_1462] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1459] + Please refer to the previous Select Operator [SEL_1461] <-Reducer 68 [CONTAINS] - Reduce Output Operator [RS_1271] - Group By Operator [GBY_1270] (rows=1 width=120) + Reduce Output Operator [RS_1273] + Group By Operator [GBY_1272] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1269] (rows=980593145 width=112) + Select Operator [SEL_1271] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1267] (rows=286549727 width=115) + Select Operator [SEL_1269] (rows=286549727 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1266] (rows=286549727 width=115) - Conds:RS_1475._col0=RS_1465._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1268] (rows=286549727 width=115) + Conds:RS_1477._col0=RS_1467._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1465] + PARTITION_ONLY_SHUFFLE [RS_1467] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1466] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1475] + SHUFFLE [RS_1477] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1474] + Please refer to the previous Select Operator [SEL_1476] <-Reducer 74 [CONTAINS] - Reduce Output Operator [RS_1289] - Group By Operator [GBY_1288] (rows=1 width=120) + Reduce Output Operator [RS_1291] + Group By Operator [GBY_1290] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1287] (rows=980593145 width=112) + Select Operator [SEL_1289] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1285] (rows=143966864 width=115) + Select Operator [SEL_1287] (rows=143966864 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1284] (rows=143966864 width=115) - Conds:RS_1490._col0=RS_1480._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1286] (rows=143966864 width=115) + Conds:RS_1492._col0=RS_1482._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1480] + PARTITION_ONLY_SHUFFLE [RS_1482] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1479] + Please refer to the previous Select Operator [SEL_1481] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1490] + SHUFFLE [RS_1492] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1489] + Please refer to the previous Select Operator [SEL_1491] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out index 5268ed3ecf..6de127d824 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query15.q.out @@ -62,81 +62,81 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_97] - Limit [LIM_96] (rows=100 width=201) + File Output Operator [FS_102] + Limit [LIM_101] (rows=100 width=201) Number of rows:100 - Select Operator [SEL_95] (rows=10141 width=201) + Select Operator [SEL_100] (rows=10141 width=201) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_94] - Group By Operator [GBY_93] (rows=10141 width=201) + SHUFFLE [RS_99] + Group By Operator [GBY_98] (rows=10141 width=201) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0 Group By Operator [GBY_23] (rows=2403417 width=201) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col3 - Top N Key Operator [TNK_43] (rows=285117831 width=212) - keys:_col3,top n:100 - Select Operator [SEL_22] (rows=285117831 width=212) - Output:["_col3","_col8"] + Select Operator [SEL_22] (rows=285117831 width=212) + Output:["_col3","_col8"] + Top N Key Operator [TNK_46] (rows=285117831 width=212) + keys:_col3,top n:100 Filter Operator [FIL_21] (rows=285117831 width=212) predicate:(_col9 or _col4 or _col5) - Merge Join Operator [MERGEJOIN_76] (rows=285117831 width=212) + Merge Join Operator [MERGEJOIN_81] (rows=285117831 width=212) Conds:RS_18._col0=RS_19._col1(Inner),Output:["_col3","_col4","_col5","_col8","_col9"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_74] (rows=80000000 width=101) - Conds:RS_79._col1=RS_81._col0(Inner),Output:["_col0","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_79] (rows=80000000 width=101) + Conds:RS_84._col1=RS_86._col0(Inner),Output:["_col0","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_79] + SHUFFLE [RS_84] PartitionCols:_col1 - Select Operator [SEL_78] (rows=80000000 width=8) + Select Operator [SEL_83] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_77] (rows=80000000 width=8) + Filter Operator [FIL_82] (rows=80000000 width=8) predicate:c_current_addr_sk is not null TableScan [TS_0] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_81] + SHUFFLE [RS_86] PartitionCols:_col0 - Select Operator [SEL_80] (rows=40000000 width=101) + Select Operator [SEL_85] (rows=40000000 width=101) Output:["_col0","_col1","_col2","_col3"] TableScan [TS_3] (rows=40000000 width=179) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_75] (rows=285117831 width=119) - Conds:RS_92._col0=RS_84._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_80] (rows=285117831 width=119) + Conds:RS_97._col0=RS_89._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_84] + SHUFFLE [RS_89] PartitionCols:_col0 - Select Operator [SEL_83] (rows=130 width=4) + Select Operator [SEL_88] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_82] (rows=130 width=12) + Filter Operator [FIL_87] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2)) TableScan [TS_8] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_92] + SHUFFLE [RS_97] PartitionCols:_col0 - Select Operator [SEL_91] (rows=285117831 width=123) + Select Operator [SEL_96] (rows=285117831 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_90] (rows=285117831 width=119) + Filter Operator [FIL_95] (rows=285117831 width=119) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_12_date_dim_d_date_sk_min) AND DynamicValue(RS_12_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_12_date_dim_d_date_sk_bloom_filter))) TableScan [TS_5] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_89] - Group By Operator [GBY_88] (rows=1 width=12) + BROADCAST [RS_94] + Group By Operator [GBY_93] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_87] - Group By Operator [GBY_86] (rows=1 width=12) + SHUFFLE [RS_92] + Group By Operator [GBY_91] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_85] (rows=130 width=4) + Select Operator [SEL_90] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_83] + Please refer to the previous Select Operator [SEL_88] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out index d96222d9e1..1a795279d1 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query17.q.out @@ -122,144 +122,144 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_248] - Limit [LIM_247] (rows=100 width=466) + File Output Operator [FS_253] + Limit [LIM_252] (rows=100 width=466) Number of rows:100 - Select Operator [SEL_246] (rows=97302218301 width=466) + Select Operator [SEL_251] (rows=97302218301 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_245] - Select Operator [SEL_244] (rows=97302218301 width=466) + SHUFFLE [RS_250] + Select Operator [SEL_249] (rows=97302218301 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_243] (rows=97302218301 width=466) + Group By Operator [GBY_248] (rows=97302218301 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_48] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_47] (rows=97302218301 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_94] (rows=97302218301 width=381) - keys:_col0, _col1, _col2,top n:100 - Select Operator [SEL_45] (rows=97302218301 width=381) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_210] (rows=97302218301 width=381) - Conds:RS_42._col6=RS_242._col0(Inner),Output:["_col3","_col10","_col16","_col19","_col21","_col22"] + Select Operator [SEL_45] (rows=97302218301 width=381) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Top N Key Operator [TNK_97] (rows=97302218301 width=381) + keys:_col21, _col22, _col19,top n:100 + Merge Join Operator [MERGEJOIN_215] (rows=97302218301 width=381) + Conds:RS_42._col6=RS_247._col0(Inner),Output:["_col3","_col10","_col16","_col19","_col21","_col22"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] + SHUFFLE [RS_247] PartitionCols:_col0 - Select Operator [SEL_241] (rows=462000 width=288) + Select Operator [SEL_246] (rows=462000 width=288) Output:["_col0","_col1","_col2"] TableScan [TS_31] (rows=462000 width=288) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_209] (rows=97302218301 width=101) - Conds:RS_39._col8=RS_240._col0(Inner),Output:["_col3","_col6","_col10","_col16","_col19"] + Merge Join Operator [MERGEJOIN_214] (rows=97302218301 width=101) + Conds:RS_39._col8=RS_245._col0(Inner),Output:["_col3","_col6","_col10","_col16","_col19"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_240] + SHUFFLE [RS_245] PartitionCols:_col0 - Select Operator [SEL_239] (rows=1704 width=90) + Select Operator [SEL_244] (rows=1704 width=90) Output:["_col0","_col1"] TableScan [TS_29] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_208] (rows=97302218301 width=19) + Merge Join Operator [MERGEJOIN_213] (rows=97302218301 width=19) Conds:RS_36._col1, _col2=RS_37._col9, _col8(Inner),Output:["_col3","_col6","_col8","_col10","_col16"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col9, _col8 - Merge Join Operator [MERGEJOIN_207] (rows=478292911 width=23) + Merge Join Operator [MERGEJOIN_212] (rows=478292911 width=23) Conds:RS_25._col2, _col1, _col4=RS_26._col2, _col1, _col3(Inner),Output:["_col1","_col3","_col5","_col8","_col9","_col11"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2, _col1, _col4 - Merge Join Operator [MERGEJOIN_205] (rows=501694138 width=19) - Conds:RS_235._col0=RS_219._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_210] (rows=501694138 width=19) + Conds:RS_240._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_219] + PARTITION_ONLY_SHUFFLE [RS_224] PartitionCols:_col0 - Select Operator [SEL_215] (rows=101 width=4) + Select Operator [SEL_220] (rows=101 width=4) Output:["_col0"] - Filter Operator [FIL_212] (rows=101 width=94) + Filter Operator [FIL_217] (rows=101 width=94) predicate:(d_quarter_name = '2000Q1') TableScan [TS_3] (rows=73049 width=94) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] + SHUFFLE [RS_240] PartitionCols:_col0 - Select Operator [SEL_234] (rows=501694138 width=23) + Select Operator [SEL_239] (rows=501694138 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_233] (rows=501694138 width=23) + Filter Operator [FIL_238] (rows=501694138 width=23) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_23_d1_d_date_sk_min) AND DynamicValue(RS_23_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_23_d1_d_date_sk_bloom_filter))) TableScan [TS_6] (rows=575995635 width=23) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_232] - Group By Operator [GBY_231] (rows=1 width=12) + BROADCAST [RS_237] + Group By Operator [GBY_236] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_225] - Group By Operator [GBY_223] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_230] + Group By Operator [GBY_228] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_220] (rows=101 width=4) + Select Operator [SEL_225] (rows=101 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_215] + Please refer to the previous Select Operator [SEL_220] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col2, _col1, _col3 - Merge Join Operator [MERGEJOIN_206] (rows=53632139 width=15) - Conds:RS_238._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_211] (rows=53632139 width=15) + Conds:RS_243._col0=RS_226._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_221] + PARTITION_ONLY_SHUFFLE [RS_226] PartitionCols:_col0 - Select Operator [SEL_216] (rows=304 width=4) + Select Operator [SEL_221] (rows=304 width=4) Output:["_col0"] - Filter Operator [FIL_213] (rows=304 width=94) + Filter Operator [FIL_218] (rows=304 width=94) predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') Please refer to the previous TableScan [TS_3] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] + SHUFFLE [RS_243] PartitionCols:_col0 - Select Operator [SEL_237] (rows=53632139 width=19) + Select Operator [SEL_242] (rows=53632139 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_236] (rows=53632139 width=19) + Filter Operator [FIL_241] (rows=53632139 width=19) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) TableScan [TS_12] (rows=57591150 width=19) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_204] (rows=285117831 width=11) - Conds:RS_230._col0=RS_217._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_209] (rows=285117831 width=11) + Conds:RS_235._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_217] + PARTITION_ONLY_SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_214] (rows=304 width=4) + Select Operator [SEL_219] (rows=304 width=4) Output:["_col0"] - Filter Operator [FIL_211] (rows=304 width=94) + Filter Operator [FIL_216] (rows=304 width=94) predicate:(d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] + SHUFFLE [RS_235] PartitionCols:_col0 - Select Operator [SEL_229] (rows=285117831 width=15) + Select Operator [SEL_234] (rows=285117831 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_228] (rows=285117831 width=15) + Filter Operator [FIL_233] (rows=285117831 width=15) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_34_d3_d_date_sk_min) AND DynamicValue(RS_34_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_34_d3_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=15) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_227] - Group By Operator [GBY_226] (rows=1 width=12) + BROADCAST [RS_232] + Group By Operator [GBY_231] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_224] - Group By Operator [GBY_222] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_229] + Group By Operator [GBY_227] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_218] (rows=304 width=4) + Select Operator [SEL_223] (rows=304 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_214] + Please refer to the previous Select Operator [SEL_219] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out index adabb76e04..144c5d2cdd 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query25.q.out @@ -128,140 +128,140 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_246] - Limit [LIM_245] (rows=100 width=808) + File Output Operator [FS_251] + Limit [LIM_250] (rows=100 width=808) Number of rows:100 - Select Operator [SEL_244] (rows=97302218301 width=808) + Select Operator [SEL_249] (rows=97302218301 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] - Group By Operator [GBY_242] (rows=97302218301 width=808) + SHUFFLE [RS_248] + Group By Operator [GBY_247] (rows=97302218301 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_47] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_46] (rows=97302218301 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col10)","sum(_col16)","sum(_col3)"],keys:_col22, _col23, _col19, _col20 - Top N Key Operator [TNK_93] (rows=97302218301 width=807) + Top N Key Operator [TNK_96] (rows=97302218301 width=807) keys:_col22, _col23, _col19, _col20,top n:100 - Merge Join Operator [MERGEJOIN_209] (rows=97302218301 width=807) - Conds:RS_42._col6=RS_241._col0(Inner),Output:["_col3","_col10","_col16","_col19","_col20","_col22","_col23"] + Merge Join Operator [MERGEJOIN_214] (rows=97302218301 width=807) + Conds:RS_42._col6=RS_246._col0(Inner),Output:["_col3","_col10","_col16","_col19","_col20","_col22","_col23"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] + SHUFFLE [RS_246] PartitionCols:_col0 - Select Operator [SEL_240] (rows=462000 width=288) + Select Operator [SEL_245] (rows=462000 width=288) Output:["_col0","_col1","_col2"] TableScan [TS_31] (rows=462000 width=288) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_208] (rows=97302218301 width=527) - Conds:RS_39._col8=RS_239._col0(Inner),Output:["_col3","_col6","_col10","_col16","_col19","_col20"] + Merge Join Operator [MERGEJOIN_213] (rows=97302218301 width=527) + Conds:RS_39._col8=RS_244._col0(Inner),Output:["_col3","_col6","_col10","_col16","_col19","_col20"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_238] (rows=1704 width=192) + Select Operator [SEL_243] (rows=1704 width=192) Output:["_col0","_col1","_col2"] TableScan [TS_29] (rows=1704 width=192) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_207] (rows=97302218301 width=343) + Merge Join Operator [MERGEJOIN_212] (rows=97302218301 width=343) Conds:RS_36._col1, _col2=RS_37._col9, _col8(Inner),Output:["_col3","_col6","_col8","_col10","_col16"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col9, _col8 - Merge Join Operator [MERGEJOIN_206] (rows=478292911 width=234) + Merge Join Operator [MERGEJOIN_211] (rows=478292911 width=234) Conds:RS_25._col2, _col1, _col4=RS_26._col2, _col1, _col3(Inner),Output:["_col1","_col3","_col5","_col8","_col9","_col11"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2, _col1, _col4 - Merge Join Operator [MERGEJOIN_204] (rows=501694138 width=122) - Conds:RS_234._col0=RS_218._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_209] (rows=501694138 width=122) + Conds:RS_239._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_218] + PARTITION_ONLY_SHUFFLE [RS_223] PartitionCols:_col0 - Select Operator [SEL_214] (rows=50 width=4) + Select Operator [SEL_219] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_211] (rows=50 width=12) + Filter Operator [FIL_216] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 4)) TableScan [TS_3] (rows=73049 width=12) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + SHUFFLE [RS_239] PartitionCols:_col0 - Select Operator [SEL_233] (rows=501694138 width=126) + Select Operator [SEL_238] (rows=501694138 width=126) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_232] (rows=501694138 width=126) + Filter Operator [FIL_237] (rows=501694138 width=126) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_23_d1_d_date_sk_min) AND DynamicValue(RS_23_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_23_d1_d_date_sk_bloom_filter))) TableScan [TS_6] (rows=575995635 width=126) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_231] - Group By Operator [GBY_230] (rows=1 width=12) + BROADCAST [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_224] - Group By Operator [GBY_222] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_229] + Group By Operator [GBY_227] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_219] (rows=50 width=4) + Select Operator [SEL_224] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_214] + Please refer to the previous Select Operator [SEL_219] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col2, _col1, _col3 - Merge Join Operator [MERGEJOIN_205] (rows=53632139 width=119) - Conds:RS_237._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_210] (rows=53632139 width=119) + Conds:RS_242._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_220] + PARTITION_ONLY_SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_215] (rows=351 width=4) + Select Operator [SEL_220] (rows=351 width=4) Output:["_col0"] - Filter Operator [FIL_212] (rows=351 width=12) + Filter Operator [FIL_217] (rows=351 width=12) predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) Please refer to the previous TableScan [TS_3] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] + SHUFFLE [RS_242] PartitionCols:_col0 - Select Operator [SEL_236] (rows=53632139 width=123) + Select Operator [SEL_241] (rows=53632139 width=123) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_235] (rows=53632139 width=123) + Filter Operator [FIL_240] (rows=53632139 width=123) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) TableScan [TS_12] (rows=57591150 width=123) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_203] (rows=285117831 width=119) - Conds:RS_229._col0=RS_216._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_208] (rows=285117831 width=119) + Conds:RS_234._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_216] + PARTITION_ONLY_SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_213] (rows=351 width=4) + Select Operator [SEL_218] (rows=351 width=4) Output:["_col0"] - Filter Operator [FIL_210] (rows=351 width=12) + Filter Operator [FIL_215] (rows=351 width=12) predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] + SHUFFLE [RS_234] PartitionCols:_col0 - Select Operator [SEL_228] (rows=285117831 width=123) + Select Operator [SEL_233] (rows=285117831 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_227] (rows=285117831 width=123) + Filter Operator [FIL_232] (rows=285117831 width=123) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_34_d3_d_date_sk_min) AND DynamicValue(RS_34_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_34_d3_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_226] - Group By Operator [GBY_225] (rows=1 width=12) + BROADCAST [RS_231] + Group By Operator [GBY_230] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_223] - Group By Operator [GBY_221] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_228] + Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_217] (rows=351 width=4) + Select Operator [SEL_222] (rows=351 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_213] + Please refer to the previous Select Operator [SEL_218] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out index 824bbe6769..44f7ce328f 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query26.q.out @@ -67,93 +67,93 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_123] - Limit [LIM_122] (rows=100 width=444) + File Output Operator [FS_128] + Limit [LIM_127] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_121] (rows=310774 width=444) + Select Operator [SEL_126] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] - Select Operator [SEL_119] (rows=310774 width=444) + SHUFFLE [RS_125] + Select Operator [SEL_124] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_118] (rows=310774 width=476) + Group By Operator [GBY_123] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 Group By Operator [GBY_27] (rows=462000 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col12 - Top N Key Operator [TNK_54] (rows=2317924 width=231) + Top N Key Operator [TNK_57] (rows=2317924 width=231) keys:_col12,top n:100 - Merge Join Operator [MERGEJOIN_98] (rows=2317924 width=231) - Conds:RS_23._col2=RS_117._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] + Merge Join Operator [MERGEJOIN_103] (rows=2317924 width=231) + Conds:RS_23._col2=RS_122._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_116] (rows=462000 width=104) + Select Operator [SEL_121] (rows=462000 width=104) Output:["_col0","_col1"] TableScan [TS_12] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_97] (rows=2317924 width=135) - Conds:RS_20._col3=RS_115._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_102] (rows=2317924 width=135) + Conds:RS_20._col3=RS_120._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_114] (rows=2300 width=4) + Select Operator [SEL_119] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_113] (rows=2300 width=174) + Filter Operator [FIL_118] (rows=2300 width=174) predicate:((p_channel_email = 'N') or (p_channel_event = 'N')) TableScan [TS_9] (rows=2300 width=174) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_96] (rows=2317924 width=137) - Conds:RS_17._col0=RS_112._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_101] (rows=2317924 width=137) + Conds:RS_17._col0=RS_117._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] + SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_111] (rows=652 width=4) + Select Operator [SEL_116] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_110] (rows=652 width=8) + Filter Operator [FIL_115] (rows=652 width=8) predicate:(d_year = 1998) TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_95] (rows=2317924 width=139) - Conds:RS_109._col1=RS_101._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_100] (rows=2317924 width=139) + Conds:RS_114._col1=RS_106._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_101] + PARTITION_ONLY_SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_100] (rows=14776 width=4) + Select Operator [SEL_105] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_99] (rows=14776 width=268) + Filter Operator [FIL_104] (rows=14776 width=268) predicate:((cd_marital_status = 'W') and (cd_education_status = 'Primary') and (cd_gender = 'F')) TableScan [TS_3] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + SHUFFLE [RS_114] PartitionCols:_col1 - Select Operator [SEL_108] (rows=283691050 width=354) + Select Operator [SEL_113] (rows=283691050 width=354) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_107] (rows=283691050 width=354) + Filter Operator [FIL_112] (rows=283691050 width=354) predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_bill_cdemo_sk BETWEEN DynamicValue(RS_15_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_15_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_15_customer_demographics_cd_demo_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=354) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) + BROADCAST [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_102] (rows=14776 width=4) + Select Operator [SEL_107] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_100] + Please refer to the previous Select Operator [SEL_105] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out index abbd02d6c9..e1a48eaeea 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query27.q.out @@ -71,95 +71,95 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_124] - Limit [LIM_123] (rows=100 width=538) + File Output Operator [FS_126] + Limit [LIM_125] (rows=100 width=538) Number of rows:100 - Select Operator [SEL_122] (rows=6526254 width=538) + Select Operator [SEL_124] (rows=6526254 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] - Select Operator [SEL_120] (rows=6526254 width=538) + SHUFFLE [RS_123] + Select Operator [SEL_122] (rows=6526254 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_119] (rows=6526254 width=570) + Group By Operator [GBY_121] (rows=6526254 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_28] (rows=13907934 width=570) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_55] (rows=4635978 width=186) - keys:_col0, _col1, 0L,top n:100 + Top N Key Operator [TNK_57] (rows=13907934 width=570) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_28] (rows=13907934 width=570) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L Select Operator [SEL_26] (rows=4635978 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_99] (rows=4635978 width=186) - Conds:RS_23._col1=RS_118._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] + Merge Join Operator [MERGEJOIN_101] (rows=4635978 width=186) + Conds:RS_23._col1=RS_120._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_117] (rows=462000 width=104) + Select Operator [SEL_119] (rows=462000 width=104) Output:["_col0","_col1"] TableScan [TS_12] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_98] (rows=4635978 width=90) - Conds:RS_20._col3=RS_116._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] + Merge Join Operator [MERGEJOIN_100] (rows=4635978 width=90) + Conds:RS_20._col3=RS_118._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] + SHUFFLE [RS_118] PartitionCols:_col0 - Select Operator [SEL_115] (rows=209 width=90) + Select Operator [SEL_117] (rows=209 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_114] (rows=209 width=90) + Filter Operator [FIL_116] (rows=209 width=90) predicate:(s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') TableScan [TS_9] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_97] (rows=4635978 width=4) - Conds:RS_17._col0=RS_113._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_99] (rows=4635978 width=4) + Conds:RS_17._col0=RS_115._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_113] + SHUFFLE [RS_115] PartitionCols:_col0 - Select Operator [SEL_112] (rows=652 width=4) + Select Operator [SEL_114] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_111] (rows=652 width=8) + Filter Operator [FIL_113] (rows=652 width=8) predicate:(d_year = 2001) TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_96] (rows=4635978 width=4) - Conds:RS_110._col2=RS_102._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_98] (rows=4635978 width=4) + Conds:RS_112._col2=RS_104._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] + PARTITION_ONLY_SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_101] (rows=14776 width=4) + Select Operator [SEL_103] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_100] (rows=14776 width=268) + Filter Operator [FIL_102] (rows=14776 width=268) predicate:((cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and (cd_gender = 'M')) TableScan [TS_3] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] + SHUFFLE [RS_112] PartitionCols:_col2 - Select Operator [SEL_109] (rows=501690006 width=340) + Select Operator [SEL_111] (rows=501690006 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_108] (rows=501690006 width=340) + Filter Operator [FIL_110] (rows=501690006 width=340) predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_15_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_15_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_15_customer_demographics_cd_demo_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=340) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_107] - Group By Operator [GBY_106] (rows=1 width=12) + BROADCAST [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_105] - Group By Operator [GBY_104] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_103] (rows=14776 width=4) + Select Operator [SEL_105] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_101] + Please refer to the previous Select Operator [SEL_103] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out index c308771dfb..2895200eb1 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query29.q.out @@ -126,141 +126,141 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_246] - Limit [LIM_245] (rows=100 width=496) + File Output Operator [FS_251] + Limit [LIM_250] (rows=100 width=496) Number of rows:100 - Select Operator [SEL_244] (rows=97302218301 width=496) + Select Operator [SEL_249] (rows=97302218301 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] - Group By Operator [GBY_242] (rows=97302218301 width=496) + SHUFFLE [RS_248] + Group By Operator [GBY_247] (rows=97302218301 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_47] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_46] (rows=97302218301 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col10)","sum(_col16)","sum(_col3)"],keys:_col22, _col23, _col19, _col20 - Top N Key Operator [TNK_93] (rows=97302218301 width=483) + Top N Key Operator [TNK_96] (rows=97302218301 width=483) keys:_col22, _col23, _col19, _col20,top n:100 - Merge Join Operator [MERGEJOIN_209] (rows=97302218301 width=483) - Conds:RS_42._col6=RS_241._col0(Inner),Output:["_col3","_col10","_col16","_col19","_col20","_col22","_col23"] + Merge Join Operator [MERGEJOIN_214] (rows=97302218301 width=483) + Conds:RS_42._col6=RS_246._col0(Inner),Output:["_col3","_col10","_col16","_col19","_col20","_col22","_col23"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] + SHUFFLE [RS_246] PartitionCols:_col0 - Select Operator [SEL_240] (rows=462000 width=288) + Select Operator [SEL_245] (rows=462000 width=288) Output:["_col0","_col1","_col2"] TableScan [TS_31] (rows=462000 width=288) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_208] (rows=97302218301 width=203) - Conds:RS_39._col8=RS_239._col0(Inner),Output:["_col3","_col6","_col10","_col16","_col19","_col20"] + Merge Join Operator [MERGEJOIN_213] (rows=97302218301 width=203) + Conds:RS_39._col8=RS_244._col0(Inner),Output:["_col3","_col6","_col10","_col16","_col19","_col20"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_238] (rows=1704 width=192) + Select Operator [SEL_243] (rows=1704 width=192) Output:["_col0","_col1","_col2"] TableScan [TS_29] (rows=1704 width=192) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_207] (rows=97302218301 width=19) + Merge Join Operator [MERGEJOIN_212] (rows=97302218301 width=19) Conds:RS_36._col1, _col2=RS_37._col9, _col8(Inner),Output:["_col3","_col6","_col8","_col10","_col16"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col9, _col8 - Merge Join Operator [MERGEJOIN_206] (rows=478292911 width=23) + Merge Join Operator [MERGEJOIN_211] (rows=478292911 width=23) Conds:RS_25._col2, _col1, _col4=RS_26._col2, _col1, _col3(Inner),Output:["_col1","_col3","_col5","_col8","_col9","_col11"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2, _col1, _col4 - Merge Join Operator [MERGEJOIN_204] (rows=501694138 width=19) - Conds:RS_234._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_209] (rows=501694138 width=19) + Conds:RS_239._col0=RS_230._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] + SHUFFLE [RS_230] PartitionCols:_col0 - Select Operator [SEL_223] (rows=50 width=4) + Select Operator [SEL_228] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_221] (rows=50 width=12) + Filter Operator [FIL_226] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 4)) TableScan [TS_9] (rows=73049 width=12) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + SHUFFLE [RS_239] PartitionCols:_col0 - Select Operator [SEL_233] (rows=501694138 width=23) + Select Operator [SEL_238] (rows=501694138 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_232] (rows=501694138 width=23) + Filter Operator [FIL_237] (rows=501694138 width=23) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_23_d1_d_date_sk_min) AND DynamicValue(RS_23_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_23_d1_d_date_sk_bloom_filter))) TableScan [TS_6] (rows=575995635 width=23) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_231] - Group By Operator [GBY_230] (rows=1 width=12) + BROADCAST [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] - Group By Operator [GBY_228] (rows=1 width=12) + SHUFFLE [RS_234] + Group By Operator [GBY_233] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_226] (rows=50 width=4) + Select Operator [SEL_231] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_223] + Please refer to the previous Select Operator [SEL_228] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col2, _col1, _col3 - Merge Join Operator [MERGEJOIN_205] (rows=53632139 width=15) - Conds:RS_237._col0=RS_227._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_210] (rows=53632139 width=15) + Conds:RS_242._col0=RS_232._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_227] + SHUFFLE [RS_232] PartitionCols:_col0 - Select Operator [SEL_224] (rows=201 width=4) + Select Operator [SEL_229] (rows=201 width=4) Output:["_col0"] - Filter Operator [FIL_222] (rows=201 width=12) + Filter Operator [FIL_227] (rows=201 width=12) predicate:((d_year = 1999) and d_moy BETWEEN 4 AND 7) Please refer to the previous TableScan [TS_9] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] + SHUFFLE [RS_242] PartitionCols:_col0 - Select Operator [SEL_236] (rows=53632139 width=19) + Select Operator [SEL_241] (rows=53632139 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_235] (rows=53632139 width=19) + Filter Operator [FIL_240] (rows=53632139 width=19) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) TableScan [TS_12] (rows=57591150 width=19) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_203] (rows=285117831 width=11) - Conds:RS_220._col0=RS_212._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_208] (rows=285117831 width=11) + Conds:RS_225._col0=RS_217._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_212] + PARTITION_ONLY_SHUFFLE [RS_217] PartitionCols:_col0 - Select Operator [SEL_211] (rows=1957 width=4) + Select Operator [SEL_216] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_210] (rows=1957 width=8) + Filter Operator [FIL_215] (rows=1957 width=8) predicate:(d_year) IN (1999, 2000, 2001) TableScan [TS_3] (rows=73049 width=8) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_219] (rows=285117831 width=15) + Select Operator [SEL_224] (rows=285117831 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_218] (rows=285117831 width=15) + Filter Operator [FIL_223] (rows=285117831 width=15) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_34_d3_d_date_sk_min) AND DynamicValue(RS_34_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_34_d3_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=15) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_215] - Group By Operator [GBY_214] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_220] + Group By Operator [GBY_219] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_213] (rows=1957 width=4) + Select Operator [SEL_218] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_211] + Please refer to the previous Select Operator [SEL_216] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out index 23b3399123..ea0f0b5057 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query35.q.out @@ -153,82 +153,82 @@ Stage-0 limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_226] - Limit [LIM_225] (rows=1 width=352) + File Output Operator [FS_231] + Limit [LIM_230] (rows=1 width=352) Number of rows:100 - Select Operator [SEL_224] (rows=1 width=352) + Select Operator [SEL_229] (rows=1 width=352) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - Select Operator [SEL_222] (rows=1 width=352) + SHUFFLE [RS_228] + Select Operator [SEL_227] (rows=1 width=352) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_221] (rows=1 width=336) + Group By Operator [GBY_226] (rows=1 width=336) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","max(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","max(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","max(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_65] (rows=2 width=336) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","sum(_col8)","count(_col8)","max(_col8)","sum(_col9)","count(_col9)","max(_col9)","sum(_col10)","count(_col10)","max(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_103] (rows=1401496 width=276) - keys:_col4, _col6, _col7, _col8, _col9, _col10,top n:100 - Select Operator [SEL_64] (rows=1401496 width=276) - Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Select Operator [SEL_64] (rows=1401496 width=276) + Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Top N Key Operator [TNK_106] (rows=1401496 width=276) + keys:_col4, _col6, _col7, _col8, _col9, _col10,top n:100 Filter Operator [FIL_63] (rows=1401496 width=276) predicate:(_col11 is not null or _col13 is not null) - Merge Join Operator [MERGEJOIN_181] (rows=1401496 width=276) - Conds:RS_60._col0=RS_220._col1(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col13"] + Merge Join Operator [MERGEJOIN_186] (rows=1401496 width=276) + Conds:RS_60._col0=RS_225._col1(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col13"] <-Reducer 5 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_60] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_180] (rows=1414922 width=276) - Conds:RS_57._col0=RS_212._col1(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_185] (rows=1414922 width=276) + Conds:RS_57._col0=RS_217._col1(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col11"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + SHUFFLE [RS_217] PartitionCols:_col1 - Select Operator [SEL_211] (rows=1414922 width=7) + Select Operator [SEL_216] (rows=1414922 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_210] (rows=1414922 width=3) + Group By Operator [GBY_215] (rows=1414922 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col0 Group By Operator [GBY_34] (rows=143930993 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_177] (rows=143930993 width=3) - Conds:RS_209._col0=RS_193._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_182] (rows=143930993 width=3) + Conds:RS_214._col0=RS_198._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_190] (rows=652 width=4) + Select Operator [SEL_195] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_189] (rows=652 width=12) + Filter Operator [FIL_194] (rows=652 width=12) predicate:((d_year = 1999) and (d_qoy < 4)) TableScan [TS_17] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] + SHUFFLE [RS_214] PartitionCols:_col0 - Select Operator [SEL_208] (rows=143930993 width=7) + Select Operator [SEL_213] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=143930993 width=7) + Filter Operator [FIL_212] (rows=143930993 width=7) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_31_date_dim_d_date_sk_min) AND DynamicValue(RS_31_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_31_date_dim_d_date_sk_bloom_filter))) TableScan [TS_24] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_199] - Group By Operator [GBY_197] (rows=1 width=12) + SHUFFLE [RS_204] + Group By Operator [GBY_202] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_194] (rows=652 width=4) + Select Operator [SEL_199] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_179] (rows=525327388 width=272) + Merge Join Operator [MERGEJOIN_184] (rows=525327388 width=272) Conds:RS_54._col0=RS_55._col0(Left Semi),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_55] @@ -237,103 +237,103 @@ Stage-0 Output:["_col0"],keys:_col0 Select Operator [SEL_23] (rows=525327388 width=3) Output:["_col0"] - Merge Join Operator [MERGEJOIN_176] (rows=525327388 width=3) - Conds:RS_204._col0=RS_191._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_181] (rows=525327388 width=3) + Conds:RS_209._col0=RS_196._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + SHUFFLE [RS_196] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] + SHUFFLE [RS_209] PartitionCols:_col0 - Select Operator [SEL_203] (rows=525327388 width=7) + Select Operator [SEL_208] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_202] (rows=525327388 width=7) + Filter Operator [FIL_207] (rows=525327388 width=7) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) TableScan [TS_14] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_201] - Group By Operator [GBY_200] (rows=1 width=12) + BROADCAST [RS_206] + Group By Operator [GBY_205] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] - Group By Operator [GBY_196] (rows=1 width=12) + SHUFFLE [RS_203] + Group By Operator [GBY_201] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_192] (rows=652 width=4) + Select Operator [SEL_197] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col0 Select Operator [SEL_13] (rows=78293105 width=272) Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] - Merge Join Operator [MERGEJOIN_175] (rows=78293105 width=272) - Conds:RS_10._col2=RS_188._col0(Inner),Output:["_col0","_col4","_col5","_col6","_col7","_col8","_col10"] + Merge Join Operator [MERGEJOIN_180] (rows=78293105 width=272) + Conds:RS_10._col2=RS_193._col0(Inner),Output:["_col0","_col4","_col5","_col6","_col7","_col8","_col10"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] + SHUFFLE [RS_193] PartitionCols:_col0 - Select Operator [SEL_187] (rows=40000000 width=90) + Select Operator [SEL_192] (rows=40000000 width=90) Output:["_col0","_col1"] TableScan [TS_5] (rows=40000000 width=90) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_174] (rows=78293105 width=190) - Conds:RS_184._col1=RS_186._col0(Inner),Output:["_col0","_col2","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_179] (rows=78293105 width=190) + Conds:RS_189._col1=RS_191._col0(Inner),Output:["_col0","_col2","_col4","_col5","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + SHUFFLE [RS_189] PartitionCols:_col1 - Select Operator [SEL_183] (rows=77201384 width=11) + Select Operator [SEL_188] (rows=77201384 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_182] (rows=77201384 width=11) + Filter Operator [FIL_187] (rows=77201384 width=11) predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=11) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + SHUFFLE [RS_191] PartitionCols:_col0 - Select Operator [SEL_185] (rows=1861800 width=186) + Select Operator [SEL_190] (rows=1861800 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] TableScan [TS_3] (rows=1861800 width=186) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + SHUFFLE [RS_225] PartitionCols:_col1 - Select Operator [SEL_219] (rows=1401496 width=7) + Select Operator [SEL_224] (rows=1401496 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_218] (rows=1401496 width=3) + Group By Operator [GBY_223] (rows=1401496 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 Group By Operator [GBY_48] (rows=285115246 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=285115246 width=3) - Conds:RS_217._col0=RS_195._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_183] (rows=285115246 width=3) + Conds:RS_222._col0=RS_200._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] + SHUFFLE [RS_200] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_190] + Please refer to the previous Select Operator [SEL_195] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + SHUFFLE [RS_222] PartitionCols:_col0 - Select Operator [SEL_216] (rows=285115246 width=7) + Select Operator [SEL_221] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_215] (rows=285115246 width=7) + Filter Operator [FIL_220] (rows=285115246 width=7) predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_ship_customer_sk BETWEEN DynamicValue(RS_60_c_c_customer_sk_min) AND DynamicValue(RS_60_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_60_c_c_customer_sk_bloom_filter))) TableScan [TS_38] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_214] - Group By Operator [GBY_213] (rows=1 width=12) + BROADCAST [RS_219] + Group By Operator [GBY_218] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_166] - Group By Operator [GBY_165] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_171] + Group By Operator [GBY_170] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_164] (rows=1414922 width=4) + Select Operator [SEL_169] (rows=1414922 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_180] + Please refer to the previous Merge Join Operator [MERGEJOIN_185] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out index 187ad5c5b5..0f16fa1ad1 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query37.q.out @@ -56,78 +56,78 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_100] - Limit [LIM_99] (rows=4 width=396) + File Output Operator [FS_105] + Limit [LIM_104] (rows=4 width=396) Number of rows:100 - Select Operator [SEL_98] (rows=4 width=396) + Select Operator [SEL_103] (rows=4 width=396) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_97] - Group By Operator [GBY_96] (rows=4 width=396) + SHUFFLE [RS_102] + Group By Operator [GBY_101] (rows=4 width=396) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_22] (rows=8 width=396) Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 - Top N Key Operator [TNK_42] (rows=11627 width=396) + Top N Key Operator [TNK_47] (rows=11627 width=396) keys:_col2, _col3, _col4,top n:100 - Merge Join Operator [MERGEJOIN_78] (rows=11627 width=396) + Merge Join Operator [MERGEJOIN_83] (rows=11627 width=396) Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2","_col3","_col4"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_76] (rows=1781971 width=400) - Conds:RS_89._col0=RS_81._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_81] (rows=1781971 width=400) + Conds:RS_94._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_81] + PARTITION_ONLY_SHUFFLE [RS_86] PartitionCols:_col0 - Select Operator [SEL_80] (rows=297 width=400) + Select Operator [SEL_85] (rows=297 width=400) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_79] (rows=297 width=404) + Filter Operator [FIL_84] (rows=297 width=404) predicate:(i_current_price BETWEEN 22 AND 52 and (i_manufact_id) IN (678, 964, 918, 849)) TableScan [TS_2] (rows=462000 width=403) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_89] + SHUFFLE [RS_94] PartitionCols:_col0 - Select Operator [SEL_88] (rows=287989836 width=4) + Select Operator [SEL_93] (rows=287989836 width=4) Output:["_col0"] - Filter Operator [FIL_87] (rows=287989836 width=4) + Filter Operator [FIL_92] (rows=287989836 width=4) predicate:(cs_item_sk BETWEEN DynamicValue(RS_16_item_i_item_sk_min) AND DynamicValue(RS_16_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_16_item_i_item_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=4) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_86] - Group By Operator [GBY_85] (rows=1 width=12) + BROADCAST [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_84] - Group By Operator [GBY_83] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_82] (rows=297 width=4) + Select Operator [SEL_87] (rows=297 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_80] + Please refer to the previous Select Operator [SEL_85] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_77] (rows=1879072 width=4) - Conds:RS_92._col0=RS_95._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_82] (rows=1879072 width=4) + Conds:RS_97._col0=RS_100._col0(Inner),Output:["_col1"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_95] + SHUFFLE [RS_100] PartitionCols:_col0 - Select Operator [SEL_94] (rows=8116 width=4) + Select Operator [SEL_99] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_93] (rows=8116 width=98) + Filter Operator [FIL_98] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' TableScan [TS_8] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_92] + SHUFFLE [RS_97] PartitionCols:_col0 - Select Operator [SEL_91] (rows=16912800 width=8) + Select Operator [SEL_96] (rows=16912800 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_90] (rows=16912800 width=11) + Filter Operator [FIL_95] (rows=16912800 width=11) predicate:inv_quantity_on_hand BETWEEN 100 AND 500 TableScan [TS_5] (rows=37584000 width=11) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out index 070b5cb1f5..fc1f190d02 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query40.q.out @@ -81,90 +81,90 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_122] - Limit [LIM_121] (rows=100 width=410) + File Output Operator [FS_127] + Limit [LIM_126] (rows=100 width=410) Number of rows:100 - Select Operator [SEL_120] (rows=769995 width=410) + Select Operator [SEL_125] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] - Group By Operator [GBY_118] (rows=769995 width=410) + SHUFFLE [RS_124] + Group By Operator [GBY_123] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0, _col1 Group By Operator [GBY_27] (rows=51819042 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Top N Key Operator [TNK_53] (rows=51819042 width=302) - keys:_col0, _col1,top n:100 - Select Operator [SEL_25] (rows=51819042 width=302) - Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_99] (rows=51819042 width=302) - Conds:RS_22._col1=RS_117._col0(Inner),Output:["_col4","_col7","_col9","_col10","_col12","_col14"] + Select Operator [SEL_25] (rows=51819042 width=302) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_56] (rows=51819042 width=302) + keys:_col14, _col12,top n:100 + Merge Join Operator [MERGEJOIN_104] (rows=51819042 width=302) + Conds:RS_22._col1=RS_122._col0(Inner),Output:["_col4","_col7","_col9","_col10","_col12","_col14"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_116] (rows=27 width=90) + Select Operator [SEL_121] (rows=27 width=90) Output:["_col0","_col1"] TableScan [TS_11] (rows=27 width=90) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_state"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_98] (rows=51819042 width=220) - Conds:RS_19._col2=RS_102._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] + Merge Join Operator [MERGEJOIN_103] (rows=51819042 width=220) + Conds:RS_19._col2=RS_107._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_102] + SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_101] (rows=51333 width=104) + Select Operator [SEL_106] (rows=51333 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_100] (rows=51333 width=215) + Filter Operator [FIL_105] (rows=51333 width=215) predicate:i_current_price BETWEEN 0.99 AND 1.49 TableScan [TS_8] (rows=462000 width=215) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_97] (rows=466374405 width=171) - Conds:RS_16._col0=RS_115._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_102] (rows=466374405 width=171) + Conds:RS_16._col0=RS_120._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_114] (rows=8116 width=12) + Select Operator [SEL_119] (rows=8116 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_113] (rows=8116 width=98) + Filter Operator [FIL_118] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' TableScan [TS_5] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_96] (rows=466374405 width=167) - Conds:RS_110._col2, _col3=RS_112._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] + Merge Join Operator [MERGEJOIN_101] (rows=466374405 width=167) + Conds:RS_115._col2, _col3=RS_117._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] + SHUFFLE [RS_115] PartitionCols:_col2, _col3 - Select Operator [SEL_109] (rows=285115816 width=127) + Select Operator [SEL_114] (rows=285115816 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_108] (rows=285115816 width=127) + Filter Operator [FIL_113] (rows=285115816 width=127) predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_107] - Group By Operator [GBY_106] (rows=1 width=12) + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_105] - Group By Operator [GBY_104] (rows=1 width=12) + SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_103] (rows=51333 width=4) + Select Operator [SEL_108] (rows=51333 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_101] + Please refer to the previous Select Operator [SEL_106] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] + SHUFFLE [RS_117] PartitionCols:_col0, _col1 - Select Operator [SEL_111] (rows=28798881 width=117) + Select Operator [SEL_116] (rows=28798881 width=117) Output:["_col0","_col1","_col2"] TableScan [TS_3] (rows=28798881 width=117) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out index b5a6c746d1..487e2948b4 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query43.q.out @@ -57,67 +57,69 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_74] - Limit [LIM_73] (rows=100 width=972) + File Output Operator [FS_80] + Limit [LIM_79] (rows=100 width=972) Number of rows:100 - Select Operator [SEL_72] (rows=3751 width=972) + Select Operator [SEL_78] (rows=3751 width=972) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_71] - Group By Operator [GBY_70] (rows=3751 width=972) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=2486913 width=972) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Top N Key Operator [TNK_33] (rows=525329897 width=322) - keys:_col0, _col1,top n:100 + SHUFFLE [RS_77] + Top N Key Operator [TNK_76] (rows=3751 width=972) + keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8,top n:100 + Group By Operator [GBY_75] (rows=3751 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0, _col1 + Group By Operator [GBY_17] (rows=2486913 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 Select Operator [SEL_15] (rows=525329897 width=322) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_55] (rows=525329897 width=322) - Conds:RS_12._col1=RS_69._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col12","_col13"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_69] - PartitionCols:_col0 - Select Operator [SEL_68] (rows=341 width=192) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_67] (rows=341 width=303) - predicate:(s_gmt_offset = -6) - TableScan [TS_6] (rows=1704 width=303) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_54] (rows=525329897 width=138) - Conds:RS_66._col0=RS_58._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_58] - PartitionCols:_col0 - Select Operator [SEL_57] (rows=652 width=32) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_56] (rows=652 width=99) - predicate:(d_year = 1998) - TableScan [TS_3] (rows=73049 width=99) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_day_name"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_66] - PartitionCols:_col0 - Select Operator [SEL_65] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_64] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_63] - Group By Operator [GBY_62] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_61] - Group By Operator [GBY_60] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_59] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_57] + Top N Key Operator [TNK_36] (rows=525329897 width=322) + keys:_col13, _col12,top n:100 + Merge Join Operator [MERGEJOIN_60] (rows=525329897 width=322) + Conds:RS_12._col1=RS_74._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col12","_col13"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] + PartitionCols:_col0 + Select Operator [SEL_73] (rows=341 width=192) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_72] (rows=341 width=303) + predicate:(s_gmt_offset = -6) + TableScan [TS_6] (rows=1704 width=303) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_59] (rows=525329897 width=138) + Conds:RS_71._col0=RS_63._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_63] + PartitionCols:_col0 + Select Operator [SEL_62] (rows=652 width=32) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_61] (rows=652 width=99) + predicate:(d_year = 1998) + TableScan [TS_3] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_day_name"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_71] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_69] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_68] + Group By Operator [GBY_67] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_66] + Group By Operator [GBY_65] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_64] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_62] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out index 3f5dbf4beb..15953f98c0 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query45.q.out @@ -67,116 +67,116 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_149] - Limit [LIM_148] (rows=100 width=299) + File Output Operator [FS_154] + Limit [LIM_153] (rows=100 width=299) Number of rows:100 - Select Operator [SEL_147] (rows=17401956 width=299) + Select Operator [SEL_152] (rows=17401956 width=299) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - Group By Operator [GBY_145] (rows=17401956 width=299) + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=17401956 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col0, _col1 Group By Operator [GBY_40] (rows=143930993 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col7 - Top N Key Operator [TNK_69] (rows=143930993 width=302) - keys:_col8, _col7,top n:100 - Select Operator [SEL_39] (rows=143930993 width=302) - Output:["_col3","_col7","_col8"] + Select Operator [SEL_39] (rows=143930993 width=302) + Output:["_col3","_col7","_col8"] + Top N Key Operator [TNK_72] (rows=143930993 width=302) + keys:_col8, _col7,top n:100 Filter Operator [FIL_38] (rows=143930993 width=302) predicate:(_col15 is not null or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) Select Operator [SEL_37] (rows=143930993 width=302) Output:["_col3","_col7","_col8","_col15"] - Merge Join Operator [MERGEJOIN_119] (rows=143930993 width=302) + Merge Join Operator [MERGEJOIN_124] (rows=143930993 width=302) Conds:RS_34._col0=RS_35._col6(Inner),Output:["_col3","_col7","_col8","_col12"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_115] (rows=462007 width=4) - Conds:RS_122._col1=RS_128._col0(Left Outer),Output:["_col0","_col3"] + Merge Join Operator [MERGEJOIN_120] (rows=462007 width=4) + Conds:RS_127._col1=RS_133._col0(Left Outer),Output:["_col0","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] + SHUFFLE [RS_127] PartitionCols:_col1 - Select Operator [SEL_120] (rows=462000 width=104) + Select Operator [SEL_125] (rows=462000 width=104) Output:["_col0","_col1"] TableScan [TS_0] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_128] + SHUFFLE [RS_133] PartitionCols:_col0 - Select Operator [SEL_127] (rows=5 width=104) + Select Operator [SEL_132] (rows=5 width=104) Output:["_col0","_col1"] - Group By Operator [GBY_126] (rows=5 width=100) + Group By Operator [GBY_131] (rows=5 width=100) Output:["_col0"],keys:KEY._col0 <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_125] + SHUFFLE [RS_130] PartitionCols:_col0 - Group By Operator [GBY_124] (rows=5 width=100) + Group By Operator [GBY_129] (rows=5 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_123] (rows=11 width=104) + Select Operator [SEL_128] (rows=11 width=104) Output:["i_item_id"] - Filter Operator [FIL_121] (rows=11 width=104) + Filter Operator [FIL_126] (rows=11 width=104) predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) Please refer to the previous TableScan [TS_0] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_118] (rows=143930993 width=302) + Merge Join Operator [MERGEJOIN_123] (rows=143930993 width=302) Conds:RS_27._col0=RS_28._col2(Inner),Output:["_col3","_col4","_col6","_col8"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_117] (rows=143930993 width=119) - Conds:RS_144._col0=RS_136._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_122] (rows=143930993 width=119) + Conds:RS_149._col0=RS_141._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_136] + PARTITION_ONLY_SHUFFLE [RS_141] PartitionCols:_col0 - Select Operator [SEL_135] (rows=130 width=12) + Select Operator [SEL_140] (rows=130 width=12) Output:["_col0"] - Filter Operator [FIL_134] (rows=130 width=12) + Filter Operator [FIL_139] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2)) TableScan [TS_17] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_149] PartitionCols:_col0 - Select Operator [SEL_143] (rows=143930993 width=123) + Select Operator [SEL_148] (rows=143930993 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_142] (rows=143930993 width=123) + Filter Operator [FIL_147] (rows=143930993 width=123) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_21_date_dim_d_date_sk_min) AND DynamicValue(RS_21_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_21_date_dim_d_date_sk_bloom_filter))) TableScan [TS_14] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_141] - Group By Operator [GBY_140] (rows=1 width=12) + BROADCAST [RS_146] + Group By Operator [GBY_145] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_139] - Group By Operator [GBY_138] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_144] + Group By Operator [GBY_143] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_137] (rows=130 width=4) + Select Operator [SEL_142] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_135] + Please refer to the previous Select Operator [SEL_140] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_116] (rows=80000000 width=191) - Conds:RS_131._col1=RS_133._col0(Inner),Output:["_col0","_col3","_col4"] + Merge Join Operator [MERGEJOIN_121] (rows=80000000 width=191) + Conds:RS_136._col1=RS_138._col0(Inner),Output:["_col0","_col3","_col4"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_133] + SHUFFLE [RS_138] PartitionCols:_col0 - Select Operator [SEL_132] (rows=40000000 width=191) + Select Operator [SEL_137] (rows=40000000 width=191) Output:["_col0","_col1","_col2"] TableScan [TS_12] (rows=40000000 width=191) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_zip"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_131] + SHUFFLE [RS_136] PartitionCols:_col1 - Select Operator [SEL_130] (rows=80000000 width=8) + Select Operator [SEL_135] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_129] (rows=80000000 width=8) + Filter Operator [FIL_134] (rows=80000000 width=8) predicate:c_current_addr_sk is not null TableScan [TS_9] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out index b384aea779..37fc51697b 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query49.q.out @@ -299,46 +299,46 @@ Stage-0 limit:100 Stage-1 Reducer 11 vectorized - File Output Operator [FS_310] - Limit [LIM_309] (rows=100 width=215) + File Output Operator [FS_315] + Limit [LIM_314] (rows=100 width=215) Number of rows:100 - Select Operator [SEL_308] (rows=40436 width=215) + Select Operator [SEL_313] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_307] - Select Operator [SEL_306] (rows=40436 width=215) + SHUFFLE [RS_312] + Select Operator [SEL_311] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_305] (rows=40436 width=215) + Group By Operator [GBY_310] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 9 [SIMPLE_EDGE] <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_351] + Reduce Output Operator [RS_356] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_350] (rows=40436 width=215) + Group By Operator [GBY_355] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_349] (rows=40436 width=214) + Top N Key Operator [TNK_354] (rows=40436 width=214) keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_348] (rows=14232 width=213) + Select Operator [SEL_353] (rows=14232 width=213) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_347] (rows=14232 width=248) + Filter Operator [FIL_352] (rows=14232 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_346] (rows=21349 width=248) + PTF Operator [PTF_351] (rows=21349 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_345] (rows=21349 width=248) + Select Operator [SEL_350] (rows=21349 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] + SHUFFLE [RS_349] PartitionCols:0 - Select Operator [SEL_343] (rows=21349 width=244) + Select Operator [SEL_348] (rows=21349 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_342] (rows=21349 width=244) + PTF Operator [PTF_347] (rows=21349 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_341] (rows=21349 width=244) + Select Operator [SEL_346] (rows=21349 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_340] + SHUFFLE [RS_345] PartitionCols:0 - Group By Operator [GBY_339] (rows=21349 width=244) + Group By Operator [GBY_344] (rows=21349 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_89] @@ -347,89 +347,89 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 Select Operator [SEL_86] (rows=20856667 width=216) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_237] (rows=20856667 width=216) - Conds:RS_83._col1, _col2=RS_338._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + Merge Join Operator [MERGEJOIN_242] (rows=20856667 width=216) + Conds:RS_83._col1, _col2=RS_343._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_338] + SHUFFLE [RS_343] PartitionCols:_col0, _col1 - Select Operator [SEL_337] (rows=19197050 width=119) + Select Operator [SEL_342] (rows=19197050 width=119) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_336] (rows=19197050 width=119) + Filter Operator [FIL_341] (rows=19197050 width=119) predicate:(sr_return_amt > 10000) TableScan [TS_77] (rows=57591150 width=119) default@store_returns,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_83] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_236] (rows=61119617 width=118) - Conds:RS_335._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_241] (rows=61119617 width=118) + Conds:RS_340._col0=RS_277._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_272] + PARTITION_ONLY_SHUFFLE [RS_277] PartitionCols:_col0 - Select Operator [SEL_267] (rows=50 width=4) + Select Operator [SEL_272] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_266] (rows=50 width=12) + Filter Operator [FIL_271] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 12)) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_335] + SHUFFLE [RS_340] PartitionCols:_col0 - Select Operator [SEL_334] (rows=61119617 width=229) + Select Operator [SEL_339] (rows=61119617 width=229) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_333] (rows=61119617 width=229) + Filter Operator [FIL_338] (rows=61119617 width=229) predicate:((ss_net_profit > 1) and (ss_net_paid > 0) and (ss_quantity > 0) and ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_81_date_dim_d_date_sk_min) AND DynamicValue(RS_81_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_81_date_dim_d_date_sk_bloom_filter))) TableScan [TS_71] (rows=575995635 width=229) default@store_sales,sts,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_332] - Group By Operator [GBY_331] (rows=1 width=12) + BROADCAST [RS_337] + Group By Operator [GBY_336] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_279] - Group By Operator [GBY_276] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_284] + Group By Operator [GBY_281] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_273] (rows=50 width=4) + Select Operator [SEL_278] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_267] + Please refer to the previous Select Operator [SEL_272] <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_304] + Reduce Output Operator [RS_309] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_303] (rows=40436 width=215) + Group By Operator [GBY_308] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_302] (rows=40436 width=214) + Top N Key Operator [TNK_307] (rows=40436 width=214) keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_301] (rows=26204 width=215) + Select Operator [SEL_306] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_300] (rows=26204 width=215) + Group By Operator [GBY_305] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 7 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_330] + Reduce Output Operator [RS_335] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_329] (rows=26204 width=215) + Group By Operator [GBY_334] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_328] (rows=12574 width=215) + Select Operator [SEL_333] (rows=12574 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_327] (rows=12574 width=248) + Filter Operator [FIL_332] (rows=12574 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_326] (rows=18863 width=248) + PTF Operator [PTF_331] (rows=18863 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_325] (rows=18863 width=248) + Select Operator [SEL_330] (rows=18863 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_324] + SHUFFLE [RS_329] PartitionCols:0 - Select Operator [SEL_323] (rows=18863 width=244) + Select Operator [SEL_328] (rows=18863 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_322] (rows=18863 width=244) + PTF Operator [PTF_327] (rows=18863 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_321] (rows=18863 width=244) + Select Operator [SEL_326] (rows=18863 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_320] + SHUFFLE [RS_325] PartitionCols:0 - Group By Operator [GBY_319] (rows=18863 width=244) + Group By Operator [GBY_324] (rows=18863 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_50] @@ -438,72 +438,72 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 Select Operator [SEL_47] (rows=9599627 width=231) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_235] (rows=9599627 width=231) - Conds:RS_44._col1, _col2=RS_318._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + Merge Join Operator [MERGEJOIN_240] (rows=9599627 width=231) + Conds:RS_44._col1, _col2=RS_323._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] + SHUFFLE [RS_323] PartitionCols:_col0, _col1 - Select Operator [SEL_317] (rows=9599627 width=121) + Select Operator [SEL_322] (rows=9599627 width=121) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_316] (rows=9599627 width=121) + Filter Operator [FIL_321] (rows=9599627 width=121) predicate:(cr_return_amount > 10000) TableScan [TS_38] (rows=28798881 width=121) default@catalog_returns,cr,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_234] (rows=31838858 width=123) - Conds:RS_315._col0=RS_270._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_239] (rows=31838858 width=123) + Conds:RS_320._col0=RS_275._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_270] + PARTITION_ONLY_SHUFFLE [RS_275] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_267] + Please refer to the previous Select Operator [SEL_272] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + SHUFFLE [RS_320] PartitionCols:_col0 - Select Operator [SEL_314] (rows=31838858 width=239) + Select Operator [SEL_319] (rows=31838858 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_313] (rows=31838858 width=239) + Filter Operator [FIL_318] (rows=31838858 width=239) predicate:((cs_net_profit > 1) and (cs_net_paid > 0) and (cs_quantity > 0) and cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_42_date_dim_d_date_sk_min) AND DynamicValue(RS_42_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_date_dim_d_date_sk_bloom_filter))) TableScan [TS_32] (rows=287989836 width=239) default@catalog_sales,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_312] - Group By Operator [GBY_311] (rows=1 width=12) + BROADCAST [RS_317] + Group By Operator [GBY_316] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_278] - Group By Operator [GBY_275] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_283] + Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_271] (rows=50 width=4) + Select Operator [SEL_276] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_267] + Please refer to the previous Select Operator [SEL_272] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_299] + Reduce Output Operator [RS_304] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_298] (rows=26204 width=215) + Group By Operator [GBY_303] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_297] (rows=13630 width=211) + Select Operator [SEL_302] (rows=13630 width=211) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_296] (rows=13630 width=248) + Filter Operator [FIL_301] (rows=13630 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_295] (rows=20445 width=248) + PTF Operator [PTF_300] (rows=20445 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_294] (rows=20445 width=248) + Select Operator [SEL_299] (rows=20445 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] + SHUFFLE [RS_298] PartitionCols:0 - Select Operator [SEL_292] (rows=20445 width=244) + Select Operator [SEL_297] (rows=20445 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_291] (rows=20445 width=244) + PTF Operator [PTF_296] (rows=20445 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_290] (rows=20445 width=244) + Select Operator [SEL_295] (rows=20445 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + SHUFFLE [RS_294] PartitionCols:0 - Group By Operator [GBY_288] (rows=20445 width=244) + Group By Operator [GBY_293] (rows=20445 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] @@ -512,44 +512,44 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0 Select Operator [SEL_15] (rows=5227456 width=231) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_233] (rows=5227456 width=231) - Conds:RS_12._col1, _col2=RS_287._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] + Merge Join Operator [MERGEJOIN_238] (rows=5227456 width=231) + Conds:RS_12._col1, _col2=RS_292._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_287] + SHUFFLE [RS_292] PartitionCols:_col0, _col1 - Select Operator [SEL_286] (rows=4799489 width=118) + Select Operator [SEL_291] (rows=4799489 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_285] (rows=4799489 width=118) + Filter Operator [FIL_290] (rows=4799489 width=118) predicate:(wr_return_amt > 10000) TableScan [TS_6] (rows=14398467 width=118) default@web_returns,wr,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_232] (rows=15996318 width=123) - Conds:RS_284._col0=RS_268._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_237] (rows=15996318 width=123) + Conds:RS_289._col0=RS_273._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_268] + PARTITION_ONLY_SHUFFLE [RS_273] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_267] + Please refer to the previous Select Operator [SEL_272] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_284] + SHUFFLE [RS_289] PartitionCols:_col0 - Select Operator [SEL_283] (rows=15996318 width=239) + Select Operator [SEL_288] (rows=15996318 width=239) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_282] (rows=15996318 width=239) + Filter Operator [FIL_287] (rows=15996318 width=239) predicate:((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=239) default@web_sales,ws,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_281] - Group By Operator [GBY_280] (rows=1 width=12) + BROADCAST [RS_286] + Group By Operator [GBY_285] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_277] - Group By Operator [GBY_274] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_282] + Group By Operator [GBY_279] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_269] (rows=50 width=4) + Select Operator [SEL_274] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_267] + Please refer to the previous Select Operator [SEL_272] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out index d3f79820f2..13288d28b4 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query5.q.out @@ -303,229 +303,229 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_300] - Limit [LIM_299] (rows=100 width=619) + File Output Operator [FS_302] + Limit [LIM_301] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_298] (rows=59581 width=619) + Select Operator [SEL_300] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_297] - Select Operator [SEL_296] (rows=59581 width=619) + SHUFFLE [RS_299] + Select Operator [SEL_298] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_295] (rows=59581 width=627) + Group By Operator [GBY_297] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] vectorized - Reduce Output Operator [RS_310] + Reduce Output Operator [RS_312] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_309] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_308] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_307] (rows=38846 width=619) + Top N Key Operator [TNK_311] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_310] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_309] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_306] (rows=38846 width=548) + Group By Operator [GBY_308] (rows=38846 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 Group By Operator [GBY_44] (rows=26026820 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_219] (rows=313339499 width=546) - Conds:RS_40._col0=RS_305._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_221] (rows=313339499 width=546) + Conds:RS_40._col0=RS_307._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_305] + SHUFFLE [RS_307] PartitionCols:_col0 - Select Operator [SEL_304] (rows=46000 width=104) + Select Operator [SEL_306] (rows=46000 width=104) Output:["_col0","_col1"] TableScan [TS_35] (rows=46000 width=104) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_218] (rows=313339499 width=450) - Conds:Union 22._col1=RS_273._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_220] (rows=313339499 width=450) + Conds:Union 22._col1=RS_275._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_273] + SHUFFLE [RS_275] PartitionCols:_col0 - Select Operator [SEL_270] (rows=8116 width=4) + Select Operator [SEL_272] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_269] (rows=8116 width=98) + Filter Operator [FIL_271] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' TableScan [TS_8] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Union 22 [SIMPLE_EDGE] <-Map 21 [CONTAINS] vectorized - Reduce Output Operator [RS_322] + Reduce Output Operator [RS_324] PartitionCols:_col1 - Select Operator [SEL_321] (rows=285117694 width=455) + Select Operator [SEL_323] (rows=285117694 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_320] (rows=285117694 width=231) + Filter Operator [FIL_322] (rows=285117694 width=231) predicate:(cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_38_date_dim_d_date_sk_min) AND DynamicValue(RS_38_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_38_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_250] (rows=287989836 width=231) + TableScan [TS_252] (rows=287989836 width=231) Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_319] - Group By Operator [GBY_318] (rows=1 width=12) + BROADCAST [RS_321] + Group By Operator [GBY_320] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_281] - Group By Operator [GBY_278] (rows=1 width=12) + SHUFFLE [RS_283] + Group By Operator [GBY_280] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_274] (rows=8116 width=4) + Select Operator [SEL_276] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_270] + Please refer to the previous Select Operator [SEL_272] <-Map 23 [CONTAINS] vectorized - Reduce Output Operator [RS_325] + Reduce Output Operator [RS_327] PartitionCols:_col1 - Select Operator [SEL_324] (rows=28221805 width=451) + Select Operator [SEL_326] (rows=28221805 width=451) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_323] (rows=28221805 width=227) + Filter Operator [FIL_325] (rows=28221805 width=227) predicate:(cr_catalog_page_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_255] (rows=28798881 width=227) + TableScan [TS_257] (rows=28798881 width=227) Output:["cr_returned_date_sk","cr_catalog_page_sk","cr_return_amount","cr_net_loss"] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_317] + Reduce Output Operator [RS_319] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_316] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_315] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_314] (rows=53 width=615) + Top N Key Operator [TNK_318] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_317] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_316] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_313] (rows=53 width=548) + Group By Operator [GBY_315] (rows=53 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_77] PartitionCols:_col0 Group By Operator [GBY_76] (rows=31641 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_221] (rows=278713608 width=547) - Conds:RS_72._col0=RS_312._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_223] (rows=278713608 width=547) + Conds:RS_72._col0=RS_314._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + SHUFFLE [RS_314] PartitionCols:_col0 - Select Operator [SEL_311] (rows=84 width=104) + Select Operator [SEL_313] (rows=84 width=104) Output:["_col0","_col1"] TableScan [TS_67] (rows=84 width=104) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_72] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_220] (rows=278713608 width=451) - Conds:Union 26._col1=RS_275._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_222] (rows=278713608 width=451) + Conds:Union 26._col1=RS_277._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_275] + SHUFFLE [RS_277] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_270] + Please refer to the previous Select Operator [SEL_272] <-Union 26 [SIMPLE_EDGE] <-Map 25 [CONTAINS] vectorized - Reduce Output Operator [RS_330] + Reduce Output Operator [RS_332] PartitionCols:_col1 - Select Operator [SEL_329] (rows=143930874 width=455) + Select Operator [SEL_331] (rows=143930874 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_328] (rows=143930874 width=231) + Filter Operator [FIL_330] (rows=143930874 width=231) predicate:(ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_70_date_dim_d_date_sk_min) AND DynamicValue(RS_70_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_70_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_260] (rows=144002668 width=231) + TableScan [TS_262] (rows=144002668 width=231) Output:["ws_sold_date_sk","ws_web_site_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_327] - Group By Operator [GBY_326] (rows=1 width=12) + BROADCAST [RS_329] + Group By Operator [GBY_328] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_282] - Group By Operator [GBY_279] (rows=1 width=12) + SHUFFLE [RS_284] + Group By Operator [GBY_281] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_276] (rows=8116 width=4) + Select Operator [SEL_278] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_270] + Please refer to the previous Select Operator [SEL_272] <-Reducer 28 [CONTAINS] - Reduce Output Operator [RS_268] + Reduce Output Operator [RS_270] PartitionCols:_col1 - Select Operator [SEL_266] (rows=134782734 width=454) + Select Operator [SEL_268] (rows=134782734 width=454) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_265] (rows=134782734 width=230) - Conds:RS_333._col0, _col2=RS_336._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_267] (rows=134782734 width=230) + Conds:RS_335._col0, _col2=RS_338._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] + SHUFFLE [RS_335] PartitionCols:_col0, _col2 - Select Operator [SEL_332] (rows=143966669 width=11) + Select Operator [SEL_334] (rows=143966669 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_331] (rows=143966669 width=11) + Filter Operator [FIL_333] (rows=143966669 width=11) predicate:ws_web_site_sk is not null TableScan [TS_52] (rows=144002668 width=11) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_item_sk","ws_web_site_sk","ws_order_number"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_336] + SHUFFLE [RS_338] PartitionCols:_col1, _col2 - Select Operator [SEL_335] (rows=13749816 width=225) + Select Operator [SEL_337] (rows=13749816 width=225) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_334] (rows=13749816 width=225) + Filter Operator [FIL_336] (rows=13749816 width=225) predicate:wr_returned_date_sk is not null TableScan [TS_55] (rows=14398467 width=225) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] vectorized - Reduce Output Operator [RS_294] + Reduce Output Operator [RS_296] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_293] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_292] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_291] (rows=822 width=617) + Top N Key Operator [TNK_295] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_294] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_293] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_290] (rows=822 width=548) + Group By Operator [GBY_292] (rows=822 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col0 Group By Operator [GBY_20] (rows=983934 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_217] (rows=578964757 width=528) - Conds:RS_16._col0=RS_289._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_219] (rows=578964757 width=528) + Conds:RS_16._col0=RS_291._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_289] + SHUFFLE [RS_291] PartitionCols:_col0 - Select Operator [SEL_288] (rows=1704 width=104) + Select Operator [SEL_290] (rows=1704 width=104) Output:["_col0","_col1"] TableScan [TS_11] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_216] (rows=578964757 width=432) - Conds:Union 2._col1=RS_271._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_218] (rows=578964757 width=432) + Conds:Union 2._col1=RS_273._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_271] + SHUFFLE [RS_273] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_270] + Please refer to the previous Select Operator [SEL_272] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_287] + Reduce Output Operator [RS_289] PartitionCols:_col1 - Select Operator [SEL_286] (rows=525329897 width=445) + Select Operator [SEL_288] (rows=525329897 width=445) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_285] (rows=525329897 width=221) + Filter Operator [FIL_287] (rows=525329897 width=221) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_14_date_dim_d_date_sk_min) AND DynamicValue(RS_14_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_14_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_222] (rows=575995635 width=221) + TableScan [TS_224] (rows=575995635 width=221) Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_284] - Group By Operator [GBY_283] (rows=1 width=12) + BROADCAST [RS_286] + Group By Operator [GBY_285] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_280] - Group By Operator [GBY_277] (rows=1 width=12) + SHUFFLE [RS_282] + Group By Operator [GBY_279] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_272] (rows=8116 width=4) + Select Operator [SEL_274] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_270] + Please refer to the previous Select Operator [SEL_272] <-Map 9 [CONTAINS] vectorized - Reduce Output Operator [RS_303] + Reduce Output Operator [RS_305] PartitionCols:_col1 - Select Operator [SEL_302] (rows=53634860 width=447) + Select Operator [SEL_304] (rows=53634860 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_301] (rows=53634860 width=223) + Filter Operator [FIL_303] (rows=53634860 width=223) predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_233] (rows=57591150 width=223) + TableScan [TS_235] (rows=57591150 width=223) Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out index 8c9754967f..db09f6b0cd 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query50.q.out @@ -138,67 +138,67 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_112] - Limit [LIM_111] (rows=100 width=858) + File Output Operator [FS_117] + Limit [LIM_116] (rows=100 width=858) Number of rows:100 - Select Operator [SEL_110] (rows=478292911 width=857) + Select Operator [SEL_115] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] - Group By Operator [GBY_108] (rows=478292911 width=857) + SHUFFLE [RS_114] + Group By Operator [GBY_113] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Group By Operator [GBY_22] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Top N Key Operator [TNK_43] (rows=478292911 width=825) - keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9,top n:100 - Select Operator [SEL_20] (rows=478292911 width=825) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_96] (rows=478292911 width=825) - Conds:RS_17._col8=RS_107._col0(Inner),Output:["_col0","_col5","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] + Select Operator [SEL_20] (rows=478292911 width=825) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Top N Key Operator [TNK_46] (rows=478292911 width=825) + keys:_col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20,top n:100 + Merge Join Operator [MERGEJOIN_101] (rows=478292911 width=825) + Conds:RS_17._col8=RS_112._col0(Inner),Output:["_col0","_col5","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] + SHUFFLE [RS_112] PartitionCols:_col0 - Select Operator [SEL_106] (rows=1704 width=821) + Select Operator [SEL_111] (rows=1704 width=821) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] TableScan [TS_9] (rows=1704 width=821) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_95] (rows=478292911 width=11) - Conds:RS_14._col1, _col2, _col3=RS_105._col1, _col2, _col4(Inner),Output:["_col0","_col5","_col8"] + Merge Join Operator [MERGEJOIN_100] (rows=478292911 width=11) + Conds:RS_14._col1, _col2, _col3=RS_110._col1, _col2, _col4(Inner),Output:["_col0","_col5","_col8"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_105] + SHUFFLE [RS_110] PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_104] (rows=501694138 width=19) + Select Operator [SEL_109] (rows=501694138 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_103] (rows=501694138 width=19) + Filter Operator [FIL_108] (rows=501694138 width=19) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null) TableScan [TS_6] (rows=575995635 width=19) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_14] PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_94] (rows=53632139 width=15) - Conds:RS_99._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_99] (rows=53632139 width=15) + Conds:RS_104._col0=RS_107._col0(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_99] + SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_98] (rows=53632139 width=15) + Select Operator [SEL_103] (rows=53632139 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_97] (rows=53632139 width=15) + Filter Operator [FIL_102] (rows=53632139 width=15) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null) TableScan [TS_0] (rows=57591150 width=15) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_102] + SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_101] (rows=50 width=4) + Select Operator [SEL_106] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_100] (rows=50 width=12) + Filter Operator [FIL_105] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 9)) TableScan [TS_3] (rows=73049 width=12) default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out index 06a5689938..8a4bca1799 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query60.q.out @@ -197,228 +197,230 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_358] - Limit [LIM_357] (rows=100 width=212) + File Output Operator [FS_364] + Limit [LIM_363] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_356] (rows=1717 width=212) + Select Operator [SEL_362] (rows=1717 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_355] - Group By Operator [GBY_354] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_376] - PartitionCols:_col0 - Group By Operator [GBY_375] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_374] (rows=5151 width=212) - keys:_col0,top n:100 - Group By Operator [GBY_373] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_106] - PartitionCols:_col0 - Group By Operator [GBY_105] (rows=99586 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_302] (rows=69268204 width=211) - Conds:RS_101._col0=RS_102._col2(Inner),Output:["_col1","_col7"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_101] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_291] (rows=34340 width=104) - Conds:RS_319._col1=RS_325._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_319] - PartitionCols:_col1 - Select Operator [SEL_318] (rows=462000 width=104) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=462000 width=104) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_325] - PartitionCols:_col0 - Group By Operator [GBY_324] (rows=23100 width=100) - Output:["_col0"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] - PartitionCols:_col0 - Group By Operator [GBY_322] (rows=23100 width=100) - Output:["_col0"],keys:i_item_id - Select Operator [SEL_321] (rows=46200 width=190) - Output:["i_item_id"] - Filter Operator [FIL_320] (rows=46200 width=190) - predicate:(i_category = 'Children') - TableScan [TS_2] (rows=462000 width=190) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_102] - PartitionCols:_col2 - Select Operator [SEL_97] (rows=143931246 width=115) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_299] (rows=143931246 width=115) - Conds:RS_94._col2=RS_349._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_349] - PartitionCols:_col0 - Select Operator [SEL_346] (rows=8000000 width=4) - Output:["_col0"] - Filter Operator [FIL_345] (rows=8000000 width=112) - predicate:(ca_gmt_offset = -6) - TableScan [TS_15] (rows=40000000 width=112) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_94] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_298] (rows=143931246 width=119) - Conds:RS_372._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_332] - PartitionCols:_col0 - Select Operator [SEL_327] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_326] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 9)) - TableScan [TS_12] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_372] - PartitionCols:_col0 - Select Operator [SEL_371] (rows=143931246 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_370] (rows=143931246 width=123) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_92_date_dim_d_date_sk_min) AND DynamicValue(RS_92_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_92_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_82] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_369] - Group By Operator [GBY_368] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_339] - Group By Operator [GBY_336] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_333] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_327] - <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_353] - PartitionCols:_col0 - Group By Operator [GBY_352] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_351] (rows=5151 width=212) - keys:_col0,top n:100 - Group By Operator [GBY_350] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_33] - PartitionCols:_col0 - Group By Operator [GBY_32] (rows=343400 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_300] (rows=252818424 width=201) - Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col1","_col7"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_291] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col2 - Select Operator [SEL_24] (rows=525327191 width=110) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_293] (rows=525327191 width=110) - Conds:RS_21._col2=RS_347._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_347] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_346] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_292] (rows=525327191 width=114) - Conds:RS_344._col0=RS_328._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_328] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_327] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] - PartitionCols:_col0 - Select Operator [SEL_343] (rows=525327191 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_342] (rows=525327191 width=118) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_9] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_341] - Group By Operator [GBY_340] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_337] - Group By Operator [GBY_334] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_329] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_327] - <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_367] - PartitionCols:_col0 - Group By Operator [GBY_366] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_365] (rows=5151 width=212) - keys:_col0,top n:100 - Group By Operator [GBY_364] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_69] - PartitionCols:_col0 - Group By Operator [GBY_68] (rows=195738 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_301] (rows=137215467 width=210) - Conds:RS_64._col0=RS_65._col3(Inner),Output:["_col1","_col7"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_64] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_291] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_65] - PartitionCols:_col3 - Select Operator [SEL_60] (rows=285117733 width=115) - Output:["_col3","_col4"] - Merge Join Operator [MERGEJOIN_296] (rows=285117733 width=115) - Conds:RS_57._col1=RS_348._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_346] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_57] + SHUFFLE [RS_361] + Top N Key Operator [TNK_360] (rows=1717 width=212) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_359] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_382] + PartitionCols:_col0 + Group By Operator [GBY_381] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_380] (rows=5151 width=212) + keys:_col0,top n:100 + Group By Operator [GBY_379] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_106] + PartitionCols:_col0 + Group By Operator [GBY_105] (rows=99586 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_307] (rows=69268204 width=211) + Conds:RS_101._col0=RS_102._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_296] (rows=34340 width=104) + Conds:RS_324._col1=RS_330._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_324] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_295] (rows=285117733 width=119) - Conds:RS_363._col0=RS_330._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_330] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_327] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_363] + Select Operator [SEL_323] (rows=462000 width=104) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_330] + PartitionCols:_col0 + Group By Operator [GBY_329] (rows=23100 width=100) + Output:["_col0"],keys:KEY._col0 + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] PartitionCols:_col0 - Select Operator [SEL_362] (rows=285117733 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_361] (rows=285117733 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_45] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_360] - Group By Operator [GBY_359] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_338] - Group By Operator [GBY_335] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_331] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_327] + Group By Operator [GBY_327] (rows=23100 width=100) + Output:["_col0"],keys:i_item_id + Select Operator [SEL_326] (rows=46200 width=190) + Output:["i_item_id"] + Filter Operator [FIL_325] (rows=46200 width=190) + predicate:(i_category = 'Children') + TableScan [TS_2] (rows=462000 width=190) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col2 + Select Operator [SEL_97] (rows=143931246 width=115) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_304] (rows=143931246 width=115) + Conds:RS_94._col2=RS_354._col0(Inner),Output:["_col1","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_354] + PartitionCols:_col0 + Select Operator [SEL_351] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_350] (rows=8000000 width=112) + predicate:(ca_gmt_offset = -6) + TableScan [TS_15] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_94] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_303] (rows=143931246 width=119) + Conds:RS_378._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + PartitionCols:_col0 + Select Operator [SEL_332] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_331] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 9)) + TableScan [TS_12] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_378] + PartitionCols:_col0 + Select Operator [SEL_377] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_376] (rows=143931246 width=123) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_92_date_dim_d_date_sk_min) AND DynamicValue(RS_92_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_92_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_82] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_375] + Group By Operator [GBY_374] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_344] + Group By Operator [GBY_341] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_338] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_332] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_358] + PartitionCols:_col0 + Group By Operator [GBY_357] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_356] (rows=5151 width=212) + keys:_col0,top n:100 + Group By Operator [GBY_355] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_33] + PartitionCols:_col0 + Group By Operator [GBY_32] (rows=343400 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_305] (rows=252818424 width=201) + Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_296] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col2 + Select Operator [SEL_24] (rows=525327191 width=110) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_298] (rows=525327191 width=110) + Conds:RS_21._col2=RS_352._col0(Inner),Output:["_col1","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_352] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_351] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_297] (rows=525327191 width=114) + Conds:RS_349._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_333] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_332] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_349] + PartitionCols:_col0 + Select Operator [SEL_348] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_347] (rows=525327191 width=118) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_19_date_dim_d_date_sk_min) AND DynamicValue(RS_19_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_19_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_9] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_346] + Group By Operator [GBY_345] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_342] + Group By Operator [GBY_339] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_334] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_332] + <-Reducer 9 [CONTAINS] vectorized + Reduce Output Operator [RS_373] + PartitionCols:_col0 + Group By Operator [GBY_372] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_371] (rows=5151 width=212) + keys:_col0,top n:100 + Group By Operator [GBY_370] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_69] + PartitionCols:_col0 + Group By Operator [GBY_68] (rows=195738 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_306] (rows=137215467 width=210) + Conds:RS_64._col0=RS_65._col3(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_64] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_296] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col3 + Select Operator [SEL_60] (rows=285117733 width=115) + Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_301] (rows=285117733 width=115) + Conds:RS_57._col1=RS_353._col0(Inner),Output:["_col2","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_353] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_351] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_57] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_300] (rows=285117733 width=119) + Conds:RS_369._col0=RS_335._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_335] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_332] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_369] + PartitionCols:_col0 + Select Operator [SEL_368] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_367] (rows=285117733 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_55_date_dim_d_date_sk_min) AND DynamicValue(RS_55_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_55_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_45] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_366] + Group By Operator [GBY_365] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_343] + Group By Operator [GBY_340] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_336] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_332] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out index be612609cf..a27a3585d9 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query66.q.out @@ -479,28 +479,28 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_251] - Select Operator [SEL_250] (rows=100 width=4614) + File Output Operator [FS_256] + Select Operator [SEL_255] (rows=100 width=4614) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43"] - Limit [LIM_249] (rows=100 width=4510) + Limit [LIM_254] (rows=100 width=4510) Number of rows:100 - Select Operator [SEL_248] (rows=2423925 width=4510) + Select Operator [SEL_253] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_247] - Group By Operator [GBY_246] (rows=2423925 width=4510) + SHUFFLE [RS_252] + Group By Operator [GBY_251] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)","sum(VALUE._col24)","sum(VALUE._col25)","sum(VALUE._col26)","sum(VALUE._col27)","sum(VALUE._col28)","sum(VALUE._col29)","sum(VALUE._col30)","sum(VALUE._col31)","sum(VALUE._col32)","sum(VALUE._col33)","sum(VALUE._col34)","sum(VALUE._col35)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Union 7 [SIMPLE_EDGE] <-Reducer 15 [CONTAINS] vectorized - Reduce Output Operator [RS_261] + Reduce Output Operator [RS_266] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_260] (rows=2513727 width=4510) + Group By Operator [GBY_265] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_259] (rows=2513727 width=3166) - keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 - Select Operator [SEL_258] (rows=2513727 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Group By Operator [GBY_257] (rows=2513700 width=3166) + Select Operator [SEL_264] (rows=2513727 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Top N Key Operator [TNK_263] (rows=2513727 width=3166) + keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 + Group By Operator [GBY_262] (rows=2513700 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_61] @@ -509,87 +509,87 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 Select Operator [SEL_58] (rows=15681803 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_202] (rows=15681803 width=750) - Conds:RS_55._col3=RS_240._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] + Merge Join Operator [MERGEJOIN_207] (rows=15681803 width=750) + Conds:RS_55._col3=RS_245._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_240] + SHUFFLE [RS_245] PartitionCols:_col0 - Select Operator [SEL_238] (rows=27 width=482) + Select Operator [SEL_243] (rows=27 width=482) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] TableScan [TS_12] (rows=27 width=482) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_201] (rows=15681803 width=275) - Conds:RS_52._col2=RS_219._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_206] (rows=15681803 width=275) + Conds:RS_52._col2=RS_224._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] + SHUFFLE [RS_224] PartitionCols:_col0 - Select Operator [SEL_216] (rows=1 width=4) + Select Operator [SEL_221] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_215] (rows=1 width=88) + Filter Operator [FIL_220] (rows=1 width=88) predicate:(sm_carrier) IN ('DIAMOND', 'AIRBORNE') TableScan [TS_9] (rows=1 width=88) default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_carrier"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_200] (rows=282272460 width=279) - Conds:RS_49._col0=RS_237._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_205] (rows=282272460 width=279) + Conds:RS_49._col0=RS_242._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_237] + SHUFFLE [RS_242] PartitionCols:_col0 - Select Operator [SEL_235] (rows=652 width=52) + Select Operator [SEL_240] (rows=652 width=52) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_234] (rows=652 width=12) + Filter Operator [FIL_239] (rows=652 width=12) predicate:(d_year = 2002) TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_199] (rows=282272460 width=235) - Conds:RS_256._col1=RS_233._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_204] (rows=282272460 width=235) + Conds:RS_261._col1=RS_238._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] + SHUFFLE [RS_238] PartitionCols:_col0 - Select Operator [SEL_231] (rows=33426 width=4) + Select Operator [SEL_236] (rows=33426 width=4) Output:["_col0"] - Filter Operator [FIL_230] (rows=33426 width=8) + Filter Operator [FIL_235] (rows=33426 width=8) predicate:t_time BETWEEN 49530 AND 78330 TableScan [TS_3] (rows=86400 width=8) default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_time"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_256] + SHUFFLE [RS_261] PartitionCols:_col1 - Select Operator [SEL_255] (rows=282272460 width=239) + Select Operator [SEL_260] (rows=282272460 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_254] (rows=282272460 width=243) + Filter Operator [FIL_259] (rows=282272460 width=243) predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_53_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_53_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_53_ship_mode_sm_ship_mode_sk_bloom_filter))) TableScan [TS_32] (rows=287989836 width=243) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_253] - Group By Operator [GBY_252] (rows=1 width=12) + BROADCAST [RS_258] + Group By Operator [GBY_257] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_224] - Group By Operator [GBY_222] (rows=1 width=12) + SHUFFLE [RS_229] + Group By Operator [GBY_227] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_220] (rows=1 width=4) + Select Operator [SEL_225] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] + Please refer to the previous Select Operator [SEL_221] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_245] + Reduce Output Operator [RS_250] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_244] (rows=2513727 width=4510) + Group By Operator [GBY_249] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_243] (rows=2513727 width=3166) - keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 - Select Operator [SEL_242] (rows=2513727 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Group By Operator [GBY_241] (rows=27 width=3166) + Select Operator [SEL_248] (rows=2513727 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Top N Key Operator [TNK_247] (rows=2513727 width=3166) + keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 + Group By Operator [GBY_246] (rows=27 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] @@ -598,57 +598,57 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 Select Operator [SEL_26] (rows=7992175 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_198] (rows=7992175 width=750) - Conds:RS_23._col3=RS_239._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] + Merge Join Operator [MERGEJOIN_203] (rows=7992175 width=750) + Conds:RS_23._col3=RS_244._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + SHUFFLE [RS_244] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_243] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_197] (rows=7992175 width=275) - Conds:RS_20._col2=RS_217._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_202] (rows=7992175 width=275) + Conds:RS_20._col2=RS_222._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + SHUFFLE [RS_222] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_216] + Please refer to the previous Select Operator [SEL_221] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_196] (rows=143859154 width=279) - Conds:RS_17._col0=RS_236._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_201] (rows=143859154 width=279) + Conds:RS_17._col0=RS_241._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] + SHUFFLE [RS_241] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_235] + Please refer to the previous Select Operator [SEL_240] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_195] (rows=143859154 width=235) - Conds:RS_229._col1=RS_232._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_200] (rows=143859154 width=235) + Conds:RS_234._col1=RS_237._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_232] + SHUFFLE [RS_237] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_231] + Please refer to the previous Select Operator [SEL_236] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_229] + SHUFFLE [RS_234] PartitionCols:_col1 - Select Operator [SEL_228] (rows=143859154 width=239) + Select Operator [SEL_233] (rows=143859154 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_227] (rows=143859154 width=243) + Filter Operator [FIL_232] (rows=143859154 width=243) predicate:(ws_sold_time_sk is not null and ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_ship_mode_sk is not null and ws_ship_mode_sk BETWEEN DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_21_ship_mode_sm_ship_mode_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=243) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_226] - Group By Operator [GBY_225] (rows=1 width=12) + BROADCAST [RS_231] + Group By Operator [GBY_230] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] - Group By Operator [GBY_221] (rows=1 width=12) + SHUFFLE [RS_228] + Group By Operator [GBY_226] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_218] (rows=1 width=4) + Select Operator [SEL_223] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_216] + Please refer to the previous Select Operator [SEL_221] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query69.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query69.q.out index d7469ae5a9..57d36f5063 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query69.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query69.q.out @@ -133,30 +133,30 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_229] - Limit [LIM_228] (rows=1 width=383) + File Output Operator [FS_234] + Limit [LIM_233] (rows=1 width=383) Number of rows:100 - Select Operator [SEL_227] (rows=1 width=383) + Select Operator [SEL_232] (rows=1 width=383) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] - Select Operator [SEL_225] (rows=1 width=383) + SHUFFLE [RS_231] + Select Operator [SEL_230] (rows=1 width=383) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Group By Operator [GBY_224] (rows=1 width=367) + Group By Operator [GBY_229] (rows=1 width=367) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_67] (rows=1 width=367) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_104] (rows=1 width=363) - keys:_col6, _col7, _col8, _col9, _col10,top n:100 - Select Operator [SEL_66] (rows=1 width=363) - Output:["_col6","_col7","_col8","_col9","_col10"] + Select Operator [SEL_66] (rows=1 width=363) + Output:["_col6","_col7","_col8","_col9","_col10"] + Top N Key Operator [TNK_107] (rows=1 width=363) + keys:_col6, _col7, _col8, _col9, _col10,top n:100 Filter Operator [FIL_65] (rows=1 width=363) predicate:_col13 is null - Merge Join Operator [MERGEJOIN_183] (rows=1401496 width=363) - Conds:RS_62._col0=RS_223._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col13"] + Merge Join Operator [MERGEJOIN_188] (rows=1401496 width=363) + Conds:RS_62._col0=RS_228._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col13"] <-Reducer 5 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_62] PartitionCols:_col0 @@ -164,55 +164,55 @@ Stage-0 Output:["_col0","_col6","_col7","_col8","_col9","_col10"] Filter Operator [FIL_46] (rows=1 width=367) predicate:_col11 is null - Merge Join Operator [MERGEJOIN_182] (rows=1414922 width=367) - Conds:RS_43._col0=RS_215._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_187] (rows=1414922 width=367) + Conds:RS_43._col0=RS_220._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] + SHUFFLE [RS_220] PartitionCols:_col1 - Select Operator [SEL_214] (rows=1414922 width=7) + Select Operator [SEL_219] (rows=1414922 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_213] (rows=1414922 width=3) + Group By Operator [GBY_218] (rows=1414922 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 Group By Operator [GBY_28] (rows=143930993 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_179] (rows=143930993 width=3) - Conds:RS_212._col0=RS_196._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_184] (rows=143930993 width=3) + Conds:RS_217._col0=RS_201._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_193] (rows=150 width=4) + Select Operator [SEL_198] (rows=150 width=4) Output:["_col0"] - Filter Operator [FIL_192] (rows=150 width=12) + Filter Operator [FIL_197] (rows=150 width=12) predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3) TableScan [TS_11] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + SHUFFLE [RS_217] PartitionCols:_col0 - Select Operator [SEL_211] (rows=143930993 width=7) + Select Operator [SEL_216] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_210] (rows=143930993 width=7) + Filter Operator [FIL_215] (rows=143930993 width=7) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) TableScan [TS_18] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_209] - Group By Operator [GBY_208] (rows=1 width=12) + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] - Group By Operator [GBY_200] (rows=1 width=12) + SHUFFLE [RS_207] + Group By Operator [GBY_205] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_197] (rows=150 width=4) + Select Operator [SEL_202] (rows=150 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_198] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_181] (rows=525327388 width=363) + Merge Join Operator [MERGEJOIN_186] (rows=525327388 width=363) Conds:RS_40._col0=RS_41._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_41] @@ -221,103 +221,103 @@ Stage-0 Output:["_col0"],keys:_col0 Select Operator [SEL_17] (rows=525327388 width=3) Output:["_col0"] - Merge Join Operator [MERGEJOIN_178] (rows=525327388 width=3) - Conds:RS_207._col0=RS_194._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_183] (rows=525327388 width=3) + Conds:RS_212._col0=RS_199._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] + SHUFFLE [RS_199] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_198] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] + SHUFFLE [RS_212] PartitionCols:_col0 - Select Operator [SEL_206] (rows=525327388 width=7) + Select Operator [SEL_211] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_205] (rows=525327388 width=7) + Filter Operator [FIL_210] (rows=525327388 width=7) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) TableScan [TS_8] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_204] - Group By Operator [GBY_203] (rows=1 width=12) + BROADCAST [RS_209] + Group By Operator [GBY_208] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] - Group By Operator [GBY_199] (rows=1 width=12) + SHUFFLE [RS_206] + Group By Operator [GBY_204] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_195] (rows=150 width=4) + Select Operator [SEL_200] (rows=150 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_198] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_40] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_177] (rows=4605476 width=363) - Conds:RS_35._col1=RS_191._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_182] (rows=4605476 width=363) + Conds:RS_35._col1=RS_196._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + SHUFFLE [RS_196] PartitionCols:_col0 - Select Operator [SEL_190] (rows=1861800 width=363) + Select Operator [SEL_195] (rows=1861800 width=363) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] TableScan [TS_6] (rows=1861800 width=363) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_35] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_176] (rows=4541258 width=5) - Conds:RS_186._col2=RS_189._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_181] (rows=4541258 width=5) + Conds:RS_191._col2=RS_194._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + SHUFFLE [RS_191] PartitionCols:_col2 - Select Operator [SEL_185] (rows=77201384 width=11) + Select Operator [SEL_190] (rows=77201384 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_184] (rows=77201384 width=11) + Filter Operator [FIL_189] (rows=77201384 width=11) predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=11) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_189] + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_188] (rows=2352941 width=90) + Select Operator [SEL_193] (rows=2352941 width=90) Output:["_col0"] - Filter Operator [FIL_187] (rows=2352941 width=90) + Filter Operator [FIL_192] (rows=2352941 width=90) predicate:(ca_state) IN ('CO', 'IL', 'MN') TableScan [TS_3] (rows=40000000 width=90) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] + SHUFFLE [RS_228] PartitionCols:_col1 - Select Operator [SEL_222] (rows=1401496 width=7) + Select Operator [SEL_227] (rows=1401496 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_221] (rows=1401496 width=3) + Group By Operator [GBY_226] (rows=1401496 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_59] PartitionCols:_col0 Group By Operator [GBY_58] (rows=285115246 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_180] (rows=285115246 width=3) - Conds:RS_220._col0=RS_198._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_185] (rows=285115246 width=3) + Conds:RS_225._col0=RS_203._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + SHUFFLE [RS_203] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_198] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_219] (rows=285115246 width=7) + Select Operator [SEL_224] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_218] (rows=285115246 width=7) + Filter Operator [FIL_223] (rows=285115246 width=7) predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_ship_customer_sk BETWEEN DynamicValue(RS_62_c_c_customer_sk_min) AND DynamicValue(RS_62_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_62_c_c_customer_sk_bloom_filter))) TableScan [TS_48] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_167] - Group By Operator [GBY_166] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_172] + Group By Operator [GBY_171] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_165] (rows=1 width=4) + Select Operator [SEL_170] (rows=1 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_47] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out index b2eccdbe90..7dc1bf6c3e 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query7.q.out @@ -67,93 +67,93 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_123] - Limit [LIM_122] (rows=100 width=444) + File Output Operator [FS_128] + Limit [LIM_127] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_121] (rows=310774 width=444) + Select Operator [SEL_126] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] - Select Operator [SEL_119] (rows=310774 width=444) + SHUFFLE [RS_125] + Select Operator [SEL_124] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_118] (rows=310774 width=476) + Group By Operator [GBY_123] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 Group By Operator [GBY_27] (rows=462000 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col12 - Top N Key Operator [TNK_54] (rows=4635977 width=100) + Top N Key Operator [TNK_57] (rows=4635977 width=100) keys:_col12,top n:100 - Merge Join Operator [MERGEJOIN_98] (rows=4635977 width=100) - Conds:RS_23._col1=RS_117._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] + Merge Join Operator [MERGEJOIN_103] (rows=4635977 width=100) + Conds:RS_23._col1=RS_122._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_116] (rows=462000 width=104) + Select Operator [SEL_121] (rows=462000 width=104) Output:["_col0","_col1"] TableScan [TS_12] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_97] (rows=4635977 width=4) - Conds:RS_20._col3=RS_115._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_102] (rows=4635977 width=4) + Conds:RS_20._col3=RS_120._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + SHUFFLE [RS_120] PartitionCols:_col0 - Select Operator [SEL_114] (rows=2300 width=4) + Select Operator [SEL_119] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_113] (rows=2300 width=174) + Filter Operator [FIL_118] (rows=2300 width=174) predicate:((p_channel_email = 'N') or (p_channel_event = 'N')) TableScan [TS_9] (rows=2300 width=174) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_96] (rows=4635977 width=4) - Conds:RS_17._col0=RS_112._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_101] (rows=4635977 width=4) + Conds:RS_17._col0=RS_117._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] + SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_111] (rows=652 width=4) + Select Operator [SEL_116] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_110] (rows=652 width=8) + Filter Operator [FIL_115] (rows=652 width=8) predicate:(d_year = 1998) TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_95] (rows=4635977 width=4) - Conds:RS_109._col2=RS_101._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_100] (rows=4635977 width=4) + Conds:RS_114._col2=RS_106._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_101] + PARTITION_ONLY_SHUFFLE [RS_106] PartitionCols:_col0 - Select Operator [SEL_100] (rows=14776 width=4) + Select Operator [SEL_105] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_99] (rows=14776 width=268) + Filter Operator [FIL_104] (rows=14776 width=268) predicate:((cd_marital_status = 'W') and (cd_education_status = 'Primary') and (cd_gender = 'F')) TableScan [TS_3] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_109] + SHUFFLE [RS_114] PartitionCols:_col2 - Select Operator [SEL_108] (rows=501686735 width=340) + Select Operator [SEL_113] (rows=501686735 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_107] (rows=501686735 width=340) + Filter Operator [FIL_112] (rows=501686735 width=340) predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_15_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_15_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_15_customer_demographics_cd_demo_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=340) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) + BROADCAST [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_102] (rows=14776 width=4) + Select Operator [SEL_107] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_100] + Please refer to the previous Select Operator [SEL_105] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query76.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query76.q.out index ce4f7cb061..24c268f4ab 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query76.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query76.q.out @@ -73,124 +73,124 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_175] - Limit [LIM_174] (rows=100 width=408) + File Output Operator [FS_180] + Limit [LIM_179] (rows=100 width=408) Number of rows:100 - Select Operator [SEL_173] (rows=5600 width=408) + Select Operator [SEL_178] (rows=5600 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] - Group By Operator [GBY_171] (rows=5600 width=408) + SHUFFLE [RS_177] + Group By Operator [GBY_176] (rows=5600 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 4 [SIMPLE_EDGE] <-Reducer 10 [CONTAINS] - Reduce Output Operator [RS_161] + Reduce Output Operator [RS_166] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_160] (rows=224000 width=408) + Group By Operator [GBY_165] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_159] (rows=26219002 width=388) + Top N Key Operator [TNK_164] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_157] (rows=1433911 width=399) + Select Operator [SEL_162] (rows=1433911 width=399) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_156] (rows=1433911 width=209) - Conds:RS_39._col1=RS_185._col0(Inner),Output:["_col2","_col4","_col5","_col7"] + Merge Join Operator [MERGEJOIN_161] (rows=1433911 width=209) + Conds:RS_39._col1=RS_190._col0(Inner),Output:["_col2","_col4","_col5","_col7"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] + SHUFFLE [RS_190] PartitionCols:_col0 - Select Operator [SEL_184] (rows=462000 width=94) + Select Operator [SEL_189] (rows=462000 width=94) Output:["_col0","_col1"] TableScan [TS_34] (rows=462000 width=94) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_142] (rows=1433911 width=123) - Conds:RS_183._col0=RS_165._col0(Inner),Output:["_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_147] (rows=1433911 width=123) + Conds:RS_188._col0=RS_170._col0(Inner),Output:["_col1","_col2","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_165] + SHUFFLE [RS_170] PartitionCols:_col0 - Select Operator [SEL_162] (rows=73049 width=12) + Select Operator [SEL_167] (rows=73049 width=12) Output:["_col0","_col1","_col2"] TableScan [TS_0] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_183] + SHUFFLE [RS_188] PartitionCols:_col0 - Select Operator [SEL_182] (rows=1433911 width=119) + Select Operator [SEL_187] (rows=1433911 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_181] (rows=1433911 width=123) + Filter Operator [FIL_186] (rows=1433911 width=123) predicate:(cs_warehouse_sk is null and cs_sold_date_sk is not null) TableScan [TS_29] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_ext_sales_price"] <-Reducer 3 [CONTAINS] - Reduce Output Operator [RS_149] + Reduce Output Operator [RS_154] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_148] (rows=224000 width=408) + Group By Operator [GBY_153] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_147] (rows=26219002 width=388) + Top N Key Operator [TNK_152] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_145] (rows=24749363 width=387) + Select Operator [SEL_150] (rows=24749363 width=387) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_144] (rows=24749363 width=204) - Conds:RS_10._col4=RS_170._col0(Inner),Output:["_col1","_col2","_col5","_col7"] + Merge Join Operator [MERGEJOIN_149] (rows=24749363 width=204) + Conds:RS_10._col4=RS_175._col0(Inner),Output:["_col1","_col2","_col5","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + SHUFFLE [RS_175] PartitionCols:_col0 - Select Operator [SEL_169] (rows=462000 width=94) + Select Operator [SEL_174] (rows=462000 width=94) Output:["_col0","_col1"] TableScan [TS_5] (rows=462000 width=94) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_138] (rows=24749363 width=118) - Conds:RS_163._col0=RS_168._col0(Inner),Output:["_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_143] (rows=24749363 width=118) + Conds:RS_168._col0=RS_173._col0(Inner),Output:["_col1","_col2","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_163] + SHUFFLE [RS_168] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_162] + Please refer to the previous Select Operator [SEL_167] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_168] + SHUFFLE [RS_173] PartitionCols:_col0 - Select Operator [SEL_167] (rows=24749363 width=114) + Select Operator [SEL_172] (rows=24749363 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_166] (rows=24749363 width=118) + Filter Operator [FIL_171] (rows=24749363 width=118) predicate:(ss_addr_sk is null and ss_sold_date_sk is not null) TableScan [TS_2] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] <-Reducer 8 [CONTAINS] - Reduce Output Operator [RS_155] + Reduce Output Operator [RS_160] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_154] (rows=224000 width=408) + Group By Operator [GBY_159] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_153] (rows=26219002 width=388) + Top N Key Operator [TNK_158] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_151] (rows=35728 width=394) + Select Operator [SEL_156] (rows=35728 width=394) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_150] (rows=35728 width=209) - Conds:RS_24._col1=RS_180._col0(Inner),Output:["_col2","_col4","_col5","_col7"] + Merge Join Operator [MERGEJOIN_155] (rows=35728 width=209) + Conds:RS_24._col1=RS_185._col0(Inner),Output:["_col2","_col4","_col5","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_180] + SHUFFLE [RS_185] PartitionCols:_col0 - Select Operator [SEL_179] (rows=462000 width=94) + Select Operator [SEL_184] (rows=462000 width=94) Output:["_col0","_col1"] TableScan [TS_19] (rows=462000 width=94) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_140] (rows=35728 width=123) - Conds:RS_178._col0=RS_164._col0(Inner),Output:["_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_145] (rows=35728 width=123) + Conds:RS_183._col0=RS_169._col0(Inner),Output:["_col1","_col2","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_164] + SHUFFLE [RS_169] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_162] + Please refer to the previous Select Operator [SEL_167] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_178] + SHUFFLE [RS_183] PartitionCols:_col0 - Select Operator [SEL_177] (rows=35728 width=119) + Select Operator [SEL_182] (rows=35728 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_176] (rows=35728 width=123) + Filter Operator [FIL_181] (rows=35728 width=123) predicate:(ws_web_page_sk is null and ws_sold_date_sk is not null) TableScan [TS_14] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_ext_sales_price"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out index 95ab61bed2..c2758b7033 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query77.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[225][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product +Warning: Shuffle Join MERGEJOIN[227][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 14' is a cross product PREHOOK: query: explain with ss as (select s_store_sk, @@ -263,107 +263,107 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_271] - Limit [LIM_270] (rows=100 width=438) + File Output Operator [FS_273] + Limit [LIM_272] (rows=100 width=438) Number of rows:100 - Select Operator [SEL_269] (rows=564 width=438) + Select Operator [SEL_271] (rows=564 width=438) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_268] - Select Operator [SEL_267] (rows=564 width=438) + SHUFFLE [RS_270] + Select Operator [SEL_269] (rows=564 width=438) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_266] (rows=564 width=446) + Group By Operator [GBY_268] (rows=564 width=446) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 5 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] - Reduce Output Operator [RS_230] + Reduce Output Operator [RS_232] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_229] (rows=564 width=446) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_228] (rows=531 width=435) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_226] (rows=10 width=439) + Top N Key Operator [TNK_231] (rows=564 width=446) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_230] (rows=564 width=446) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_228] (rows=10 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_225] (rows=10 width=452) + Merge Join Operator [MERGEJOIN_227] (rows=10 width=452) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_278] - Group By Operator [GBY_277] (rows=10 width=228) + PARTITION_ONLY_SHUFFLE [RS_280] + Group By Operator [GBY_279] (rows=10 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 Group By Operator [GBY_44] (rows=2550 width=227) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col1 - Merge Join Operator [MERGEJOIN_212] (rows=286549727 width=227) - Conds:RS_276._col0=RS_242._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_214] (rows=286549727 width=227) + Conds:RS_278._col0=RS_244._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_242] + PARTITION_ONLY_SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_238] (rows=8116 width=4) + Select Operator [SEL_240] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_237] (rows=8116 width=98) + Filter Operator [FIL_239] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_276] + SHUFFLE [RS_278] PartitionCols:_col0 - Select Operator [SEL_275] (rows=286549727 width=231) + Select Operator [SEL_277] (rows=286549727 width=231) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_274] (rows=286549727 width=231) + Filter Operator [FIL_276] (rows=286549727 width=231) predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_41_date_dim_d_date_sk_min) AND DynamicValue(RS_41_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_41_date_dim_d_date_sk_bloom_filter))) TableScan [TS_34] (rows=287989836 width=231) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_273] - Group By Operator [GBY_272] (rows=1 width=12) + BROADCAST [RS_275] + Group By Operator [GBY_274] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_252] - Group By Operator [GBY_249] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_254] + Group By Operator [GBY_251] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_243] (rows=8116 width=4) + Select Operator [SEL_245] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_240] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_283] - Group By Operator [GBY_282] (rows=1 width=224) + PARTITION_ONLY_SHUFFLE [RS_285] + Group By Operator [GBY_284] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Reducer 16 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_59] Group By Operator [GBY_58] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(_col1)","sum(_col2)"] - Merge Join Operator [MERGEJOIN_213] (rows=3199657 width=183) - Conds:RS_281._col0=RS_244._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_215] (rows=3199657 width=183) + Conds:RS_283._col0=RS_246._col0(Inner),Output:["_col1","_col2"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_244] + PARTITION_ONLY_SHUFFLE [RS_246] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_240] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_281] + SHUFFLE [RS_283] PartitionCols:_col0 - Select Operator [SEL_280] (rows=28798881 width=223) + Select Operator [SEL_282] (rows=28798881 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_279] (rows=28798881 width=223) + Filter Operator [FIL_281] (rows=28798881 width=223) predicate:cr_returned_date_sk is not null TableScan [TS_48] (rows=28798881 width=223) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_return_amount","cr_net_loss"] <-Reducer 20 [CONTAINS] - Reduce Output Operator [RS_236] + Reduce Output Operator [RS_238] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_235] (rows=564 width=446) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_234] (rows=531 width=435) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_232] (rows=394 width=435) + Top N Key Operator [TNK_237] (rows=564 width=446) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_236] (rows=564 width=446) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_234] (rows=394 width=435) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_231] (rows=394 width=335) - Conds:RS_290._col0=RS_295._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_233] (rows=394 width=335) + Conds:RS_292._col0=RS_297._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + SHUFFLE [RS_292] PartitionCols:_col0 - Group By Operator [GBY_289] (rows=206 width=228) + Group By Operator [GBY_291] (rows=206 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_79] @@ -372,36 +372,36 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 Select Operator [SEL_76] (rows=143931136 width=227) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_214] (rows=143931136 width=227) - Conds:RS_288._col0=RS_245._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_216] (rows=143931136 width=227) + Conds:RS_290._col0=RS_247._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_245] + PARTITION_ONLY_SHUFFLE [RS_247] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_240] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_288] + SHUFFLE [RS_290] PartitionCols:_col0 - Select Operator [SEL_287] (rows=143931136 width=231) + Select Operator [SEL_289] (rows=143931136 width=231) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_286] (rows=143931136 width=231) + Filter Operator [FIL_288] (rows=143931136 width=231) predicate:(ws_sold_date_sk is not null and ws_web_page_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_74_date_dim_d_date_sk_min) AND DynamicValue(RS_74_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_74_date_dim_d_date_sk_bloom_filter))) TableScan [TS_67] (rows=144002668 width=231) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_web_page_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_285] - Group By Operator [GBY_284] (rows=1 width=12) + BROADCAST [RS_287] + Group By Operator [GBY_286] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_253] - Group By Operator [GBY_250] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_255] + Group By Operator [GBY_252] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_246] (rows=8116 width=4) + Select Operator [SEL_248] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_240] <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_295] + SHUFFLE [RS_297] PartitionCols:_col0 - Group By Operator [GBY_294] (rows=188 width=227) + Group By Operator [GBY_296] (rows=188 width=227) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_94] @@ -410,36 +410,36 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 Select Operator [SEL_91] (rows=13129719 width=217) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_215] (rows=13129719 width=217) - Conds:RS_293._col0=RS_247._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_217] (rows=13129719 width=217) + Conds:RS_295._col0=RS_249._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_247] + PARTITION_ONLY_SHUFFLE [RS_249] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_240] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] + SHUFFLE [RS_295] PartitionCols:_col0 - Select Operator [SEL_292] (rows=13129719 width=221) + Select Operator [SEL_294] (rows=13129719 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_291] (rows=13129719 width=221) + Filter Operator [FIL_293] (rows=13129719 width=221) predicate:(wr_web_page_sk is not null and wr_returned_date_sk is not null) TableScan [TS_82] (rows=14398467 width=221) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_web_page_sk","wr_return_amt","wr_net_loss"] <-Reducer 4 [CONTAINS] - Reduce Output Operator [RS_224] + Reduce Output Operator [RS_226] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_223] (rows=564 width=446) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_222] (rows=531 width=435) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_220] (rows=127 width=436) + Top N Key Operator [TNK_225] (rows=564 width=446) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_224] (rows=564 width=446) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_222] (rows=127 width=436) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_219] (rows=127 width=379) - Conds:RS_260._col0=RS_265._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_221] (rows=127 width=379) + Conds:RS_262._col0=RS_267._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_265] + SHUFFLE [RS_267] PartitionCols:_col0 - Group By Operator [GBY_264] (rows=85 width=227) + Group By Operator [GBY_266] (rows=85 width=227) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] @@ -448,25 +448,25 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 Select Operator [SEL_24] (rows=53634860 width=220) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_211] (rows=53634860 width=220) - Conds:RS_263._col0=RS_241._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_213] (rows=53634860 width=220) + Conds:RS_265._col0=RS_243._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_241] + PARTITION_ONLY_SHUFFLE [RS_243] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_240] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_263] + SHUFFLE [RS_265] PartitionCols:_col0 - Select Operator [SEL_262] (rows=53634860 width=223) + Select Operator [SEL_264] (rows=53634860 width=223) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_261] (rows=53634860 width=223) + Filter Operator [FIL_263] (rows=53634860 width=223) predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) TableScan [TS_15] (rows=57591150 width=223) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] <-Reducer 3 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_260] + SHUFFLE [RS_262] PartitionCols:_col0 - Group By Operator [GBY_259] (rows=84 width=227) + Group By Operator [GBY_261] (rows=84 width=227) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] @@ -475,30 +475,30 @@ Stage-0 Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","sum(_col2)"],keys:_col0 Select Operator [SEL_9] (rows=525329897 width=217) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_210] (rows=525329897 width=217) - Conds:RS_258._col0=RS_239._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_212] (rows=525329897 width=217) + Conds:RS_260._col0=RS_241._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_239] + PARTITION_ONLY_SHUFFLE [RS_241] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_240] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_258] + SHUFFLE [RS_260] PartitionCols:_col0 - Select Operator [SEL_257] (rows=525329897 width=221) + Select Operator [SEL_259] (rows=525329897 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_256] (rows=525329897 width=221) + Filter Operator [FIL_258] (rows=525329897 width=221) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_7_date_dim_d_date_sk_min) AND DynamicValue(RS_7_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_7_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=221) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_255] - Group By Operator [GBY_254] (rows=1 width=12) + BROADCAST [RS_257] + Group By Operator [GBY_256] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_251] - Group By Operator [GBY_248] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_253] + Group By Operator [GBY_250] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_240] (rows=8116 width=4) + Select Operator [SEL_242] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_238] + Please refer to the previous Select Operator [SEL_240] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out index 170bccf406..ed1501bf59 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out @@ -245,141 +245,141 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_149] - Limit [LIM_148] (rows=1 width=200) + File Output Operator [FS_154] + Limit [LIM_153] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_147] (rows=1 width=200) + Select Operator [SEL_152] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - Group By Operator [GBY_145] (rows=1 width=200) + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col0 Group By Operator [GBY_56] (rows=401 width=200) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6 - Top N Key Operator [TNK_84] (rows=525329897 width=194) + Top N Key Operator [TNK_87] (rows=525329897 width=194) keys:_col6,top n:100 - Merge Join Operator [MERGEJOIN_118] (rows=525329897 width=194) + Merge Join Operator [MERGEJOIN_123] (rows=525329897 width=194) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) - Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_122] (rows=1 width=92) + Conds:RS_146._col0=RS_149._col2(Inner),Output:["_col1","_col2"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_149] PartitionCols:_col2 - Select Operator [SEL_143] (rows=1704 width=276) + Select Operator [SEL_148] (rows=1704 width=276) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_142] (rows=1704 width=181) + Filter Operator [FIL_147] (rows=1704 width=181) predicate:substr(s_zip, 1, 2) is not null TableScan [TS_42] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_140] (rows=1 width=184) + Select Operator [SEL_145] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_139] (rows=1 width=192) + Filter Operator [FIL_144] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_138] (rows=5633 width=192) + Group By Operator [GBY_143] (rows=5633 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_170] + Reduce Output Operator [RS_175] PartitionCols:_col0 - Group By Operator [GBY_169] (rows=5633 width=192) + Group By Operator [GBY_174] (rows=5633 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_168] (rows=1126 width=192) + Group By Operator [GBY_173] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_172] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=1126 width=192) + Group By Operator [GBY_171] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_165] (rows=2253 width=97) + Select Operator [SEL_170] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_164] (rows=2253 width=97) + Filter Operator [FIL_169] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_163] (rows=6761 width=97) + Group By Operator [GBY_168] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 - Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) - Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_121] (rows=26666667 width=89) + Conds:RS_164._col0=RS_167._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_164] PartitionCols:_col0 - Select Operator [SEL_158] (rows=40000000 width=93) + Select Operator [SEL_163] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=40000000 width=93) + Filter Operator [FIL_162] (rows=40000000 width=93) predicate:substr(substr(ca_zip, 1, 5), 1, 2) is not null TableScan [TS_14] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_167] PartitionCols:_col0 - Select Operator [SEL_161] (rows=26666667 width=4) + Select Operator [SEL_166] (rows=26666667 width=4) Output:["_col0"] - Filter Operator [FIL_160] (rows=26666667 width=89) + Filter Operator [FIL_165] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=89) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_156] + Reduce Output Operator [RS_161] PartitionCols:_col0 - Group By Operator [GBY_155] (rows=5633 width=192) + Group By Operator [GBY_160] (rows=5633 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_154] (rows=10141 width=192) + Group By Operator [GBY_159] (rows=10141 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_158] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=141974 width=192) + Group By Operator [GBY_157] (rows=141974 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_151] (rows=20000000 width=89) + Select Operator [SEL_156] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_150] (rows=20000000 width=89) + Filter Operator [FIL_155] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=89) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_115] (rows=525329897 width=110) - Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_120] (rows=525329897 width=110) + Conds:RS_142._col0=RS_134._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_129] + PARTITION_ONLY_SHUFFLE [RS_134] PartitionCols:_col0 - Select Operator [SEL_128] (rows=130 width=4) + Select Operator [SEL_133] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_127] (rows=130 width=12) + Filter Operator [FIL_132] (rows=130 width=12) predicate:((d_year = 2002) and (d_qoy = 1)) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] + SHUFFLE [RS_142] PartitionCols:_col0 - Select Operator [SEL_136] (rows=525329897 width=114) + Select Operator [SEL_141] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_135] (rows=525329897 width=114) + Filter Operator [FIL_140] (rows=525329897 width=114) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) + BROADCAST [RS_139] + Group By Operator [GBY_138] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_130] (rows=130 width=4) + Select Operator [SEL_135] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_128] + Please refer to the previous Select Operator [SEL_133] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out index b18f89373c..72a54928c2 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query80.q.out @@ -249,28 +249,28 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_430] - Limit [LIM_429] (rows=100 width=619) + File Output Operator [FS_432] + Limit [LIM_431] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_428] (rows=59581 width=619) + Select Operator [SEL_430] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_427] - Select Operator [SEL_426] (rows=59581 width=619) + SHUFFLE [RS_429] + Select Operator [SEL_428] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_425] (rows=59581 width=627) + Group By Operator [GBY_427] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 8 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_444] + Reduce Output Operator [RS_446] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_443] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_442] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_441] (rows=38846 width=619) + Top N Key Operator [TNK_445] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_444] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_443] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_440] (rows=38846 width=436) + Group By Operator [GBY_442] (rows=38846 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_71] @@ -279,99 +279,99 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_68] (rows=154681759 width=322) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_363] (rows=154681759 width=322) - Conds:RS_65._col1=RS_439._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] + Merge Join Operator [MERGEJOIN_365] (rows=154681759 width=322) + Conds:RS_65._col1=RS_441._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_439] + SHUFFLE [RS_441] PartitionCols:_col0 - Select Operator [SEL_438] (rows=46000 width=104) + Select Operator [SEL_440] (rows=46000 width=104) Output:["_col0","_col1"] TableScan [TS_51] (rows=46000 width=104) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_362] (rows=154681759 width=226) - Conds:RS_62._col3=RS_416._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_364] (rows=154681759 width=226) + Conds:RS_62._col3=RS_418._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_416] + SHUFFLE [RS_418] PartitionCols:_col0 - Select Operator [SEL_414] (rows=1150 width=4) + Select Operator [SEL_416] (rows=1150 width=4) Output:["_col0"] - Filter Operator [FIL_413] (rows=1150 width=89) + Filter Operator [FIL_415] (rows=1150 width=89) predicate:(p_channel_tv = 'N') TableScan [TS_11] (rows=2300 width=89) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_tv"] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_62] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_361] (rows=154681759 width=230) - Conds:RS_59._col2=RS_411._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_363] (rows=154681759 width=230) + Conds:RS_59._col2=RS_413._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_411] + SHUFFLE [RS_413] PartitionCols:_col0 - Select Operator [SEL_409] (rows=154000 width=4) + Select Operator [SEL_411] (rows=154000 width=4) Output:["_col0"] - Filter Operator [FIL_408] (rows=154000 width=115) + Filter Operator [FIL_410] (rows=154000 width=115) predicate:(i_current_price > 50) TableScan [TS_8] (rows=462000 width=115) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_59] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_360] (rows=464045263 width=322) - Conds:RS_56._col0=RS_391._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_362] (rows=464045263 width=322) + Conds:RS_56._col0=RS_393._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_391] + SHUFFLE [RS_393] PartitionCols:_col0 - Select Operator [SEL_388] (rows=8116 width=4) + Select Operator [SEL_390] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_387] (rows=8116 width=98) + Filter Operator [FIL_389] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' TableScan [TS_5] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Reducer 30 [SIMPLE_EDGE] SHUFFLE [RS_56] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_359] (rows=464045263 width=326) - Conds:RS_435._col2, _col4=RS_437._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_361] (rows=464045263 width=326) + Conds:RS_437._col2, _col4=RS_439._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_435] + SHUFFLE [RS_437] PartitionCols:_col2, _col4 - Select Operator [SEL_434] (rows=283691906 width=243) + Select Operator [SEL_436] (rows=283691906 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_433] (rows=283691906 width=243) + Filter Operator [FIL_435] (rows=283691906 width=243) predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) TableScan [TS_37] (rows=287989836 width=243) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_ext_sales_price","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_432] - Group By Operator [GBY_431] (rows=1 width=12) + BROADCAST [RS_434] + Group By Operator [GBY_433] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_399] - Group By Operator [GBY_396] (rows=1 width=12) + SHUFFLE [RS_401] + Group By Operator [GBY_398] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_392] (rows=8116 width=4) + Select Operator [SEL_394] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_388] + Please refer to the previous Select Operator [SEL_390] <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_437] + SHUFFLE [RS_439] PartitionCols:_col0, _col1 - Select Operator [SEL_436] (rows=28798881 width=227) + Select Operator [SEL_438] (rows=28798881 width=227) Output:["_col0","_col1","_col2","_col3"] TableScan [TS_40] (rows=28798881 width=227) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_amount","cr_net_loss"] <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_458] + Reduce Output Operator [RS_460] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_457] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_456] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_455] (rows=53 width=615) + Top N Key Operator [TNK_459] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_458] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_457] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_454] (rows=53 width=436) + Group By Operator [GBY_456] (rows=53 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_109] @@ -380,84 +380,84 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_106] (rows=84869669 width=323) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_368] (rows=84869669 width=323) - Conds:RS_103._col2=RS_453._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] + Merge Join Operator [MERGEJOIN_370] (rows=84869669 width=323) + Conds:RS_103._col2=RS_455._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_453] + SHUFFLE [RS_455] PartitionCols:_col0 - Select Operator [SEL_452] (rows=84 width=104) + Select Operator [SEL_454] (rows=84 width=104) Output:["_col0","_col1"] TableScan [TS_89] (rows=84 width=104) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_367] (rows=84869669 width=227) - Conds:RS_100._col3=RS_417._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_369] (rows=84869669 width=227) + Conds:RS_100._col3=RS_419._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_417] + SHUFFLE [RS_419] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_414] + Please refer to the previous Select Operator [SEL_416] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_100] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_366] (rows=84869669 width=231) - Conds:RS_97._col1=RS_412._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_368] (rows=84869669 width=231) + Conds:RS_97._col1=RS_414._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_412] + SHUFFLE [RS_414] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_409] + Please refer to the previous Select Operator [SEL_411] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_97] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_365] (rows=254608997 width=359) - Conds:RS_94._col0=RS_393._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_367] (rows=254608997 width=359) + Conds:RS_94._col0=RS_395._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_393] + SHUFFLE [RS_395] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_388] + Please refer to the previous Select Operator [SEL_390] <-Reducer 34 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_364] (rows=254608997 width=363) - Conds:RS_449._col1, _col4=RS_451._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_366] (rows=254608997 width=363) + Conds:RS_451._col1, _col4=RS_453._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_449] + SHUFFLE [RS_451] PartitionCols:_col1, _col4 - Select Operator [SEL_448] (rows=143894769 width=243) + Select Operator [SEL_450] (rows=143894769 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_447] (rows=143894769 width=243) + Filter Operator [FIL_449] (rows=143894769 width=243) predicate:(ws_promo_sk is not null and ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) TableScan [TS_75] (rows=144002668 width=243) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_site_sk","ws_promo_sk","ws_order_number","ws_ext_sales_price","ws_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_446] - Group By Operator [GBY_445] (rows=1 width=12) + BROADCAST [RS_448] + Group By Operator [GBY_447] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_400] - Group By Operator [GBY_397] (rows=1 width=12) + SHUFFLE [RS_402] + Group By Operator [GBY_399] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_394] (rows=8116 width=4) + Select Operator [SEL_396] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_388] + Please refer to the previous Select Operator [SEL_390] <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_451] + SHUFFLE [RS_453] PartitionCols:_col0, _col1 - Select Operator [SEL_450] (rows=14398467 width=221) + Select Operator [SEL_452] (rows=14398467 width=221) Output:["_col0","_col1","_col2","_col3"] TableScan [TS_78] (rows=14398467 width=221) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 7 [CONTAINS] vectorized - Reduce Output Operator [RS_424] + Reduce Output Operator [RS_426] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_423] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_422] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_421] (rows=822 width=617) + Top N Key Operator [TNK_425] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_424] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_423] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_420] (rows=822 width=436) + Group By Operator [GBY_422] (rows=822 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_34] @@ -466,71 +466,71 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_31] (rows=270716624 width=305) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_358] (rows=270716624 width=305) - Conds:RS_28._col2=RS_419._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] + Merge Join Operator [MERGEJOIN_360] (rows=270716624 width=305) + Conds:RS_28._col2=RS_421._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_419] + SHUFFLE [RS_421] PartitionCols:_col0 - Select Operator [SEL_418] (rows=1704 width=104) + Select Operator [SEL_420] (rows=1704 width=104) Output:["_col0","_col1"] TableScan [TS_14] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_357] (rows=270716624 width=208) - Conds:RS_25._col3=RS_415._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_359] (rows=270716624 width=208) + Conds:RS_25._col3=RS_417._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_415] + SHUFFLE [RS_417] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_414] + Please refer to the previous Select Operator [SEL_416] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_356] (rows=270716624 width=212) - Conds:RS_22._col1=RS_410._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_358] (rows=270716624 width=212) + Conds:RS_22._col1=RS_412._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_410] + SHUFFLE [RS_412] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_409] + Please refer to the previous Select Operator [SEL_411] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_355] (rows=812149846 width=370) - Conds:RS_19._col0=RS_389._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_357] (rows=812149846 width=370) + Conds:RS_19._col0=RS_391._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_389] + SHUFFLE [RS_391] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_388] + Please refer to the previous Select Operator [SEL_390] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_354] (rows=812149846 width=374) - Conds:RS_405._col1, _col4=RS_407._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_356] (rows=812149846 width=374) + Conds:RS_407._col1, _col4=RS_409._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_405] + SHUFFLE [RS_407] PartitionCols:_col1, _col4 - Select Operator [SEL_404] (rows=501693263 width=233) + Select Operator [SEL_406] (rows=501693263 width=233) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_403] (rows=501693263 width=233) + Filter Operator [FIL_405] (rows=501693263 width=233) predicate:(ss_sold_date_sk is not null and ss_promo_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=233) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_ext_sales_price","ss_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_402] - Group By Operator [GBY_401] (rows=1 width=12) + BROADCAST [RS_404] + Group By Operator [GBY_403] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_398] - Group By Operator [GBY_395] (rows=1 width=12) + SHUFFLE [RS_400] + Group By Operator [GBY_397] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_390] (rows=8116 width=4) + Select Operator [SEL_392] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_388] + Please refer to the previous Select Operator [SEL_390] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_407] + SHUFFLE [RS_409] PartitionCols:_col0, _col1 - Select Operator [SEL_406] (rows=57591150 width=224) + Select Operator [SEL_408] (rows=57591150 width=224) Output:["_col0","_col1","_col2","_col3"] TableScan [TS_3] (rows=57591150 width=224) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_amt","sr_net_loss"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query82.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query82.q.out index 8dd6ae9f0f..c1d17eb803 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query82.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query82.q.out @@ -56,78 +56,78 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_100] - Limit [LIM_99] (rows=1 width=396) + File Output Operator [FS_105] + Limit [LIM_104] (rows=1 width=396) Number of rows:100 - Select Operator [SEL_98] (rows=1 width=396) + Select Operator [SEL_103] (rows=1 width=396) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_97] - Group By Operator [GBY_96] (rows=1 width=396) + SHUFFLE [RS_102] + Group By Operator [GBY_101] (rows=1 width=396) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_22] (rows=2 width=396) Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 - Top N Key Operator [TNK_42] (rows=11627 width=396) + Top N Key Operator [TNK_47] (rows=11627 width=396) keys:_col2, _col3, _col4,top n:100 - Merge Join Operator [MERGEJOIN_78] (rows=11627 width=396) + Merge Join Operator [MERGEJOIN_83] (rows=11627 width=396) Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2","_col3","_col4"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_76] (rows=3564040 width=400) - Conds:RS_89._col0=RS_81._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_81] (rows=3564040 width=400) + Conds:RS_94._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_81] + PARTITION_ONLY_SHUFFLE [RS_86] PartitionCols:_col0 - Select Operator [SEL_80] (rows=297 width=400) + Select Operator [SEL_85] (rows=297 width=400) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_79] (rows=297 width=404) + Filter Operator [FIL_84] (rows=297 width=404) predicate:(i_current_price BETWEEN 30 AND 60 and (i_manufact_id) IN (437, 129, 727, 663)) TableScan [TS_2] (rows=462000 width=403) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_89] + SHUFFLE [RS_94] PartitionCols:_col0 - Select Operator [SEL_88] (rows=575995635 width=4) + Select Operator [SEL_93] (rows=575995635 width=4) Output:["_col0"] - Filter Operator [FIL_87] (rows=575995635 width=4) + Filter Operator [FIL_92] (rows=575995635 width=4) predicate:(ss_item_sk BETWEEN DynamicValue(RS_16_item_i_item_sk_min) AND DynamicValue(RS_16_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_16_item_i_item_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=4) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_86] - Group By Operator [GBY_85] (rows=1 width=12) + BROADCAST [RS_91] + Group By Operator [GBY_90] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_84] - Group By Operator [GBY_83] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_89] + Group By Operator [GBY_88] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_82] (rows=297 width=4) + Select Operator [SEL_87] (rows=297 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_80] + Please refer to the previous Select Operator [SEL_85] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_77] (rows=1879072 width=4) - Conds:RS_92._col0=RS_95._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_82] (rows=1879072 width=4) + Conds:RS_97._col0=RS_100._col0(Inner),Output:["_col1"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_95] + SHUFFLE [RS_100] PartitionCols:_col0 - Select Operator [SEL_94] (rows=8116 width=4) + Select Operator [SEL_99] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_93] (rows=8116 width=98) + Filter Operator [FIL_98] (rows=8116 width=98) predicate:CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' TableScan [TS_8] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_92] + SHUFFLE [RS_97] PartitionCols:_col0 - Select Operator [SEL_91] (rows=16912800 width=8) + Select Operator [SEL_96] (rows=16912800 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_90] (rows=16912800 width=11) + Filter Operator [FIL_95] (rows=16912800 width=11) predicate:inv_quantity_on_hand BETWEEN 100 AND 500 TableScan [TS_5] (rows=37584000 width=11) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out index c77a73f4d5..53102977dd 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out @@ -95,88 +95,88 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_120] - Limit [LIM_119] (rows=100 width=590) + File Output Operator [FS_125] + Limit [LIM_124] (rows=100 width=590) Number of rows:100 - Select Operator [SEL_118] (rows=3920468 width=590) + Select Operator [SEL_123] (rows=3920468 width=590) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] - Select Operator [SEL_116] (rows=3920468 width=590) + SHUFFLE [RS_122] + Select Operator [SEL_121] (rows=3920468 width=590) Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_115] (rows=3920468 width=406) + Group By Operator [GBY_120] (rows=3920468 width=406) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_25] (rows=7840936 width=406) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col13, _col11, _col15 - Top N Key Operator [TNK_53] (rows=15681873 width=386) + Top N Key Operator [TNK_56] (rows=15681873 width=386) keys:_col13, _col11, _col15,top n:100 - Merge Join Operator [MERGEJOIN_97] (rows=15681873 width=386) - Conds:RS_21._col1=RS_114._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"] + Merge Join Operator [MERGEJOIN_102] (rows=15681873 width=386) + Conds:RS_21._col1=RS_119._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_113] (rows=60 width=102) + Select Operator [SEL_118] (rows=60 width=102) Output:["_col0","_col1"] TableScan [TS_10] (rows=60 width=102) default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_name"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_96] (rows=15681873 width=291) - Conds:RS_18._col3=RS_112._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] + Merge Join Operator [MERGEJOIN_101] (rows=15681873 width=291) + Conds:RS_18._col3=RS_117._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] + SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_111] (rows=27 width=188) + Select Operator [SEL_116] (rows=27 width=188) Output:["_col0","_col1"] TableScan [TS_8] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_95] (rows=15681873 width=111) - Conds:RS_15._col2=RS_99._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] + Merge Join Operator [MERGEJOIN_100] (rows=15681873 width=111) + Conds:RS_15._col2=RS_104._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_99] + SHUFFLE [RS_104] PartitionCols:_col0 - Select Operator [SEL_98] (rows=1 width=88) + Select Operator [SEL_103] (rows=1 width=88) Output:["_col0","_col1"] TableScan [TS_6] (rows=1 width=88) default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_type"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_94] (rows=282273729 width=31) - Conds:RS_107._col0=RS_110._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_99] (rows=282273729 width=31) + Conds:RS_112._col0=RS_115._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] + SHUFFLE [RS_112] PartitionCols:_col0 - Select Operator [SEL_106] (rows=282273729 width=35) + Select Operator [SEL_111] (rows=282273729 width=35) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_105] (rows=282273729 width=19) + Filter Operator [FIL_110] (rows=282273729 width=19) predicate:(cs_warehouse_sk is not null and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and cs_call_center_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_16_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_16_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_16_ship_mode_sm_ship_mode_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=19) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_104] - Group By Operator [GBY_103] (rows=1 width=12) + BROADCAST [RS_109] + Group By Operator [GBY_108] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_102] - Group By Operator [GBY_101] (rows=1 width=12) + SHUFFLE [RS_107] + Group By Operator [GBY_106] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_100] (rows=1 width=4) + Select Operator [SEL_105] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_98] + Please refer to the previous Select Operator [SEL_103] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] + SHUFFLE [RS_115] PartitionCols:_col0 - Select Operator [SEL_109] (rows=317 width=4) + Select Operator [SEL_114] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_108] (rows=317 width=8) + Filter Operator [FIL_113] (rows=317 width=8) predicate:d_month_seq BETWEEN 1212 AND 1223 TableScan [TS_3] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git ql/src/test/results/clientpositive/perf/tez/query10.q.out ql/src/test/results/clientpositive/perf/tez/query10.q.out index b346a5c5fb..0b332453d9 100644 --- ql/src/test/results/clientpositive/perf/tez/query10.q.out +++ ql/src/test/results/clientpositive/perf/tez/query10.q.out @@ -157,120 +157,120 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_231] - Limit [LIM_230] (rows=1 width=419) + File Output Operator [FS_236] + Limit [LIM_235] (rows=1 width=419) Number of rows:100 - Select Operator [SEL_229] (rows=1 width=419) + Select Operator [SEL_234] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - Select Operator [SEL_227] (rows=1 width=419) + SHUFFLE [RS_233] + Select Operator [SEL_232] (rows=1 width=419) Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"] - Group By Operator [GBY_226] (rows=1 width=379) + Group By Operator [GBY_231] (rows=1 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Group By Operator [GBY_66] (rows=3 width=379) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Top N Key Operator [TNK_103] (rows=1401496 width=379) - keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,top n:100 - Select Operator [SEL_65] (rows=1401496 width=379) - Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Select Operator [SEL_65] (rows=1401496 width=379) + Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Top N Key Operator [TNK_106] (rows=1401496 width=379) + keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,top n:100 Filter Operator [FIL_64] (rows=1401496 width=379) predicate:(_col14 is not null or _col16 is not null) - Merge Join Operator [MERGEJOIN_181] (rows=1401496 width=379) - Conds:RS_61._col0=RS_225._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col16"] + Merge Join Operator [MERGEJOIN_186] (rows=1401496 width=379) + Conds:RS_61._col0=RS_230._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col16"] <-Reducer 19 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] + SHUFFLE [RS_230] PartitionCols:_col1 - Select Operator [SEL_224] (rows=1401496 width=7) + Select Operator [SEL_229] (rows=1401496 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_223] (rows=1401496 width=3) + Group By Operator [GBY_228] (rows=1401496 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col0 Group By Operator [GBY_43] (rows=285115246 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=285115246 width=3) - Conds:RS_222._col0=RS_197._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_183] (rows=285115246 width=3) + Conds:RS_227._col0=RS_202._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_197] + PARTITION_ONLY_SHUFFLE [RS_202] PartitionCols:_col0 - Select Operator [SEL_192] (rows=201 width=4) + Select Operator [SEL_197] (rows=201 width=4) Output:["_col0"] - Filter Operator [FIL_191] (rows=201 width=12) + Filter Operator [FIL_196] (rows=201 width=12) predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) TableScan [TS_12] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] + SHUFFLE [RS_227] PartitionCols:_col0 - Select Operator [SEL_221] (rows=285115246 width=7) + Select Operator [SEL_226] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_220] (rows=285115246 width=7) + Filter Operator [FIL_225] (rows=285115246 width=7) predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) TableScan [TS_33] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Reducer 20 [BROADCAST_EDGE] vectorized - BROADCAST [RS_219] - Group By Operator [GBY_218] (rows=1 width=12) + BROADCAST [RS_224] + Group By Operator [GBY_223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_204] - Group By Operator [GBY_201] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_209] + Group By Operator [GBY_206] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_198] (rows=201 width=4) + Select Operator [SEL_203] (rows=201 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] + Please refer to the previous Select Operator [SEL_197] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_61] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_180] (rows=1414922 width=379) - Conds:RS_58._col0=RS_217._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Merge Join Operator [MERGEJOIN_185] (rows=1414922 width=379) + Conds:RS_58._col0=RS_222._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + SHUFFLE [RS_222] PartitionCols:_col1 - Select Operator [SEL_216] (rows=1414922 width=7) + Select Operator [SEL_221] (rows=1414922 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_215] (rows=1414922 width=3) + Group By Operator [GBY_220] (rows=1414922 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0 Group By Operator [GBY_29] (rows=143930993 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_177] (rows=143930993 width=3) - Conds:RS_214._col0=RS_195._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_182] (rows=143930993 width=3) + Conds:RS_219._col0=RS_200._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_195] + PARTITION_ONLY_SHUFFLE [RS_200] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_192] + Please refer to the previous Select Operator [SEL_197] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_214] + SHUFFLE [RS_219] PartitionCols:_col0 - Select Operator [SEL_213] (rows=143930993 width=7) + Select Operator [SEL_218] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_212] (rows=143930993 width=7) + Filter Operator [FIL_217] (rows=143930993 width=7) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) TableScan [TS_19] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_211] - Group By Operator [GBY_210] (rows=1 width=12) + BROADCAST [RS_216] + Group By Operator [GBY_215] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_203] - Group By Operator [GBY_200] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_208] + Group By Operator [GBY_205] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_196] (rows=201 width=4) + Select Operator [SEL_201] (rows=201 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] + Please refer to the previous Select Operator [SEL_197] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_179] (rows=525327388 width=375) + Merge Join Operator [MERGEJOIN_184] (rows=525327388 width=375) Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_56] @@ -279,66 +279,66 @@ Stage-0 Output:["_col0"],keys:_col0 Select Operator [SEL_18] (rows=525327388 width=3) Output:["_col0"] - Merge Join Operator [MERGEJOIN_176] (rows=525327388 width=3) - Conds:RS_209._col0=RS_193._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_181] (rows=525327388 width=3) + Conds:RS_214._col0=RS_198._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_193] + PARTITION_ONLY_SHUFFLE [RS_198] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_192] + Please refer to the previous Select Operator [SEL_197] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] + SHUFFLE [RS_214] PartitionCols:_col0 - Select Operator [SEL_208] (rows=525327388 width=7) + Select Operator [SEL_213] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=525327388 width=7) + Filter Operator [FIL_212] (rows=525327388 width=7) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) TableScan [TS_9] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_202] - Group By Operator [GBY_199] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_207] + Group By Operator [GBY_204] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_194] (rows=201 width=4) + Select Operator [SEL_199] (rows=201 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_192] + Please refer to the previous Select Operator [SEL_197] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_175] (rows=228127 width=375) - Conds:RS_50._col1=RS_190._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] + Merge Join Operator [MERGEJOIN_180] (rows=228127 width=375) + Conds:RS_50._col1=RS_195._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_190] + SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_189] (rows=1861800 width=375) + Select Operator [SEL_194] (rows=1861800 width=375) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_188] (rows=1861800 width=375) + Filter Operator [FIL_193] (rows=1861800 width=375) predicate:cd_demo_sk is not null TableScan [TS_6] (rows=1861800 width=375) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_50] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_174] (rows=224946 width=4) - Conds:RS_184._col2=RS_187._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_179] (rows=224946 width=4) + Conds:RS_189._col2=RS_192._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_184] + SHUFFLE [RS_189] PartitionCols:_col2 - Select Operator [SEL_183] (rows=77201384 width=11) + Select Operator [SEL_188] (rows=77201384 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_182] (rows=77201384 width=11) + Filter Operator [FIL_187] (rows=77201384 width=11) predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=11) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] + SHUFFLE [RS_192] PartitionCols:_col0 - Select Operator [SEL_186] (rows=116550 width=102) + Select Operator [SEL_191] (rows=116550 width=102) Output:["_col0"] - Filter Operator [FIL_185] (rows=116550 width=102) + Filter Operator [FIL_190] (rows=116550 width=102) predicate:((ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County') and ca_address_sk is not null) TableScan [TS_3] (rows=40000000 width=102) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"] diff --git ql/src/test/results/clientpositive/perf/tez/query14.q.out ql/src/test/results/clientpositive/perf/tez/query14.q.out index 069fad2b4a..00bc4cb026 100644 --- ql/src/test/results/clientpositive/perf/tez/query14.q.out +++ ql/src/test/results/clientpositive/perf/tez/query14.q.out @@ -1,6 +1,6 @@ -Warning: Shuffle Join MERGEJOIN[1173][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product -Warning: Shuffle Join MERGEJOIN[1180][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product -Warning: Shuffle Join MERGEJOIN[1187][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product +Warning: Shuffle Join MERGEJOIN[1175][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product +Warning: Shuffle Join MERGEJOIN[1182][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[1189][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 22' is a cross product PREHOOK: query: explain with cross_items as (select i_item_sk ss_item_sk @@ -296,36 +296,36 @@ Stage-0 limit:100 Stage-1 Reducer 9 vectorized - File Output Operator [FS_1346] - Limit [LIM_1345] (rows=100 width=223) + File Output Operator [FS_1348] + Limit [LIM_1347] (rows=100 width=223) Number of rows:100 - Select Operator [SEL_1344] (rows=304320 width=223) + Select Operator [SEL_1346] (rows=304320 width=223) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1343] - Select Operator [SEL_1342] (rows=304320 width=223) + SHUFFLE [RS_1345] + Select Operator [SEL_1344] (rows=304320 width=223) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_1341] (rows=304320 width=231) + Group By Operator [GBY_1343] (rows=304320 width=231) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 7 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_1186] + Reduce Output Operator [RS_1188] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1185] (rows=304320 width=231) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1184] (rows=121728 width=221) - keys:_col0, _col1, _col2, _col3, 0L,top n:100 - Select Operator [SEL_1182] (rows=40576 width=223) + Top N Key Operator [TNK_1187] (rows=304320 width=231) + keys:_col0, _col1, _col2, _col3,top n:100 + Group By Operator [GBY_1186] (rows=304320 width=231) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Select Operator [SEL_1184] (rows=40576 width=223) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1181] (rows=40576 width=244) + Filter Operator [FIL_1183] (rows=40576 width=244) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1180] (rows=121728 width=244) + Merge Join Operator [MERGEJOIN_1182] (rows=121728 width=244) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1358] - Filter Operator [FIL_1357] (rows=121728 width=132) + PARTITION_ONLY_SHUFFLE [RS_1360] + Filter Operator [FIL_1359] (rows=121728 width=132) predicate:_col3 is not null - Group By Operator [GBY_1356] (rows=121728 width=132) + Group By Operator [GBY_1358] (rows=121728 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_244] @@ -334,378 +334,378 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_241] (rows=286549727 width=127) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1168] (rows=286549727 width=127) + Merge Join Operator [MERGEJOIN_1170] (rows=286549727 width=127) Conds:RS_238._col1=RS_239._col0(Left Semi),Output:["_col2","_col3","_col6","_col7","_col8"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_238] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1144] (rows=286549727 width=131) - Conds:RS_233._col1=RS_1326._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_1146] (rows=286549727 width=131) + Conds:RS_233._col1=RS_1328._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1326] + SHUFFLE [RS_1328] PartitionCols:_col0 - Select Operator [SEL_1317] (rows=462000 width=15) + Select Operator [SEL_1319] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1308] (rows=462000 width=15) + Filter Operator [FIL_1310] (rows=462000 width=15) predicate:i_item_sk is not null TableScan [TS_6] (rows=462000 width=15) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_brand_id","i_class_id","i_category_id"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_233] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1143] (rows=286549727 width=119) - Conds:RS_1351._col0=RS_1288._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1145] (rows=286549727 width=119) + Conds:RS_1353._col0=RS_1290._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1288] + PARTITION_ONLY_SHUFFLE [RS_1290] PartitionCols:_col0 - Select Operator [SEL_1285] (rows=50 width=4) + Select Operator [SEL_1287] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_1284] (rows=50 width=12) + Filter Operator [FIL_1286] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 11) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 79 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1351] + SHUFFLE [RS_1353] PartitionCols:_col0 - Select Operator [SEL_1350] (rows=286549727 width=123) + Select Operator [SEL_1352] (rows=286549727 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1349] (rows=286549727 width=123) + Filter Operator [FIL_1351] (rows=286549727 width=123) predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_231_date_dim_d_date_sk_min) AND DynamicValue(RS_231_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_231_date_dim_d_date_sk_bloom_filter))) TableScan [TS_146] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_quantity","cs_list_price"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1348] - Group By Operator [GBY_1347] (rows=1 width=12) + BROADCAST [RS_1350] + Group By Operator [GBY_1349] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1296] - Group By Operator [GBY_1293] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1298] + Group By Operator [GBY_1295] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1289] (rows=50 width=4) + Select Operator [SEL_1291] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1285] + Please refer to the previous Select Operator [SEL_1287] <-Reducer 33 [SIMPLE_EDGE] SHUFFLE [RS_239] PartitionCols:_col0 Group By Operator [GBY_237] (rows=364 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1151] (rows=729 width=4) - Conds:RS_1327._col1, _col2, _col3=RS_1355._col0, _col1, _col2(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_1153] (rows=729 width=4) + Conds:RS_1329._col1, _col2, _col3=RS_1357._col0, _col1, _col2(Inner),Output:["_col0"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1327] + SHUFFLE [RS_1329] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1318] (rows=458612 width=15) + Select Operator [SEL_1320] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1309] (rows=458612 width=15) + Filter Operator [FIL_1311] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1355] + SHUFFLE [RS_1357] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1354] (rows=1 width=12) + Select Operator [SEL_1356] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1353] (rows=1 width=20) + Filter Operator [FIL_1355] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1352] (rows=120960 width=20) + Group By Operator [GBY_1354] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 31 [SIMPLE_EDGE] <-Reducer 30 [CONTAINS] vectorized - Reduce Output Operator [RS_1409] + Reduce Output Operator [RS_1411] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1408] (rows=120960 width=20) + Group By Operator [GBY_1410] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1407] (rows=120960 width=20) + Group By Operator [GBY_1409] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_175] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_28] (rows=3144960 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1134] (rows=546042657 width=11) - Conds:RS_24._col1=RS_1323._col0(Inner),Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_1136] (rows=546042657 width=11) + Conds:RS_24._col1=RS_1325._col0(Inner),Output:["_col4","_col5","_col6"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1323] + SHUFFLE [RS_1325] PartitionCols:_col0 - Select Operator [SEL_1314] (rows=458612 width=15) + Select Operator [SEL_1316] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1305] (rows=458612 width=15) + Filter Operator [FIL_1307] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 47 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1133] (rows=550076554 width=4) - Conds:RS_1403._col0=RS_1381._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1135] (rows=550076554 width=4) + Conds:RS_1405._col0=RS_1383._col0(Inner),Output:["_col1"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1381] + SHUFFLE [RS_1383] PartitionCols:_col0 - Select Operator [SEL_1380] (rows=1957 width=4) + Select Operator [SEL_1382] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1379] (rows=1957 width=8) + Filter Operator [FIL_1381] (rows=1957 width=8) predicate:(d_year BETWEEN 1999 AND 2001 and d_date_sk is not null) TableScan [TS_15] (rows=73049 width=8) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 46 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1403] + SHUFFLE [RS_1405] PartitionCols:_col0 - Select Operator [SEL_1402] (rows=550076554 width=7) + Select Operator [SEL_1404] (rows=550076554 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1401] (rows=550076554 width=7) + Filter Operator [FIL_1403] (rows=550076554 width=7) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_22_d1_d_date_sk_min) AND DynamicValue(RS_22_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_d1_d_date_sk_bloom_filter))) TableScan [TS_12] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk"] <-Reducer 49 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1400] - Group By Operator [GBY_1399] (rows=1 width=12) + BROADCAST [RS_1402] + Group By Operator [GBY_1401] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 48 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1395] - Group By Operator [GBY_1391] (rows=1 width=12) + SHUFFLE [RS_1397] + Group By Operator [GBY_1393] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1382] (rows=1957 width=4) + Select Operator [SEL_1384] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Reducer 40 [CONTAINS] vectorized - Reduce Output Operator [RS_1423] + Reduce Output Operator [RS_1425] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1422] (rows=120960 width=20) + Group By Operator [GBY_1424] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1421] (rows=120960 width=20) + Group By Operator [GBY_1423] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 38 [SIMPLE_EDGE] SHUFFLE [RS_195] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_48] (rows=1693440 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1136] (rows=284448361 width=11) - Conds:RS_44._col1=RS_1324._col0(Inner),Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_1138] (rows=284448361 width=11) + Conds:RS_44._col1=RS_1326._col0(Inner),Output:["_col4","_col5","_col6"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1324] + SHUFFLE [RS_1326] PartitionCols:_col0 - Select Operator [SEL_1315] (rows=458612 width=15) + Select Operator [SEL_1317] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1306] (rows=458612 width=15) + Filter Operator [FIL_1308] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 50 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1135] (rows=286549727 width=4) - Conds:RS_1417._col0=RS_1383._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1137] (rows=286549727 width=4) + Conds:RS_1419._col0=RS_1385._col0(Inner),Output:["_col1"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1383] + SHUFFLE [RS_1385] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Map 64 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1417] + SHUFFLE [RS_1419] PartitionCols:_col0 - Select Operator [SEL_1416] (rows=286549727 width=7) + Select Operator [SEL_1418] (rows=286549727 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1415] (rows=286549727 width=7) + Filter Operator [FIL_1417] (rows=286549727 width=7) predicate:(cs_sold_date_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_42_d2_d_date_sk_min) AND DynamicValue(RS_42_d2_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d2_d_date_sk_bloom_filter))) TableScan [TS_32] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk"] <-Reducer 51 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1414] - Group By Operator [GBY_1413] (rows=1 width=12) + BROADCAST [RS_1416] + Group By Operator [GBY_1415] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 48 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1396] - Group By Operator [GBY_1392] (rows=1 width=12) + SHUFFLE [RS_1398] + Group By Operator [GBY_1394] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1384] (rows=1957 width=4) + Select Operator [SEL_1386] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Reducer 44 [CONTAINS] vectorized - Reduce Output Operator [RS_1437] + Reduce Output Operator [RS_1439] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1436] (rows=120960 width=20) + Group By Operator [GBY_1438] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1435] (rows=120960 width=20) + Group By Operator [GBY_1437] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 42 [SIMPLE_EDGE] SHUFFLE [RS_216] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_69] (rows=846720 width=19) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col5, _col6 - Merge Join Operator [MERGEJOIN_1138] (rows=142911107 width=11) - Conds:RS_65._col1=RS_1325._col0(Inner),Output:["_col4","_col5","_col6"] + Merge Join Operator [MERGEJOIN_1140] (rows=142911107 width=11) + Conds:RS_65._col1=RS_1327._col0(Inner),Output:["_col4","_col5","_col6"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1325] + SHUFFLE [RS_1327] PartitionCols:_col0 - Select Operator [SEL_1316] (rows=458612 width=15) + Select Operator [SEL_1318] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1307] (rows=458612 width=15) + Filter Operator [FIL_1309] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 52 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1137] (rows=143966864 width=4) - Conds:RS_1431._col0=RS_1385._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_1139] (rows=143966864 width=4) + Conds:RS_1433._col0=RS_1387._col0(Inner),Output:["_col1"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1385] + SHUFFLE [RS_1387] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Map 65 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1431] + SHUFFLE [RS_1433] PartitionCols:_col0 - Select Operator [SEL_1430] (rows=143966864 width=7) + Select Operator [SEL_1432] (rows=143966864 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_1429] (rows=143966864 width=7) + Filter Operator [FIL_1431] (rows=143966864 width=7) predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_63_d3_d_date_sk_min) AND DynamicValue(RS_63_d3_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_63_d3_d_date_sk_bloom_filter))) TableScan [TS_53] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk"] <-Reducer 53 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1428] - Group By Operator [GBY_1427] (rows=1 width=12) + BROADCAST [RS_1430] + Group By Operator [GBY_1429] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 48 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1397] - Group By Operator [GBY_1393] (rows=1 width=12) + SHUFFLE [RS_1399] + Group By Operator [GBY_1395] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1386] (rows=1957 width=4) + Select Operator [SEL_1388] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Reducer 60 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1362] - Select Operator [SEL_1361] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1364] + Select Operator [SEL_1363] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1360] (rows=1 width=120) + Filter Operator [FIL_1362] (rows=1 width=120) predicate:(_col0 is not null and _col1 is not null) - Group By Operator [GBY_1359] (rows=1 width=120) + Group By Operator [GBY_1361] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 59 [CUSTOM_SIMPLE_EDGE] <-Reducer 58 [CONTAINS] - Reduce Output Operator [RS_1241] - Group By Operator [GBY_1240] (rows=1 width=120) + Reduce Output Operator [RS_1243] + Group By Operator [GBY_1242] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1239] (rows=980593145 width=112) + Select Operator [SEL_1241] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1237] (rows=550076554 width=110) + Select Operator [SEL_1239] (rows=550076554 width=110) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1236] (rows=550076554 width=110) - Conds:RS_1446._col0=RS_1389._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1238] (rows=550076554 width=110) + Conds:RS_1448._col0=RS_1391._col0(Inner),Output:["_col1","_col2"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1389] + SHUFFLE [RS_1391] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1446] + SHUFFLE [RS_1448] PartitionCols:_col0 - Select Operator [SEL_1444] (rows=550076554 width=114) + Select Operator [SEL_1446] (rows=550076554 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1443] (rows=550076554 width=114) + Filter Operator [FIL_1445] (rows=550076554 width=114) predicate:(ss_sold_date_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_109_date_dim_d_date_sk_min) AND DynamicValue(RS_109_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_109_date_dim_d_date_sk_bloom_filter))) TableScan [TS_102] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] <-Reducer 57 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1442] - Group By Operator [GBY_1441] (rows=1 width=12) + BROADCAST [RS_1444] + Group By Operator [GBY_1443] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 48 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_1398] - Group By Operator [GBY_1394] (rows=1 width=12) + SHUFFLE [RS_1400] + Group By Operator [GBY_1396] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1388] (rows=1957 width=4) + Select Operator [SEL_1390] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Reducer 69 [CONTAINS] - Reduce Output Operator [RS_1259] - Group By Operator [GBY_1258] (rows=1 width=120) + Reduce Output Operator [RS_1261] + Group By Operator [GBY_1260] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1257] (rows=980593145 width=112) + Select Operator [SEL_1259] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1255] (rows=286549727 width=115) + Select Operator [SEL_1257] (rows=286549727 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1254] (rows=286549727 width=115) - Conds:RS_1461._col0=RS_1452._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1256] (rows=286549727 width=115) + Conds:RS_1463._col0=RS_1454._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1452] + PARTITION_ONLY_SHUFFLE [RS_1454] PartitionCols:_col0 - Select Operator [SEL_1449] (rows=1957 width=4) + Select Operator [SEL_1451] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1448] (rows=1957 width=8) + Filter Operator [FIL_1450] (rows=1957 width=8) predicate:(d_year BETWEEN 1998 AND 2000 and d_date_sk is not null) TableScan [TS_115] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1461] + SHUFFLE [RS_1463] PartitionCols:_col0 - Select Operator [SEL_1459] (rows=286549727 width=119) + Select Operator [SEL_1461] (rows=286549727 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1458] (rows=286549727 width=119) + Filter Operator [FIL_1460] (rows=286549727 width=119) predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_119_date_dim_d_date_sk_min) AND DynamicValue(RS_119_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_119_date_dim_d_date_sk_bloom_filter))) TableScan [TS_112] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] <-Reducer 72 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1457] - Group By Operator [GBY_1456] (rows=1 width=12) + BROADCAST [RS_1459] + Group By Operator [GBY_1458] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 71 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1455] - Group By Operator [GBY_1454] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1457] + Group By Operator [GBY_1456] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1451] (rows=1957 width=4) + Select Operator [SEL_1453] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1449] + Please refer to the previous Select Operator [SEL_1451] <-Reducer 75 [CONTAINS] - Reduce Output Operator [RS_1277] - Group By Operator [GBY_1276] (rows=1 width=120) + Reduce Output Operator [RS_1279] + Group By Operator [GBY_1278] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1275] (rows=980593145 width=112) + Select Operator [SEL_1277] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1273] (rows=143966864 width=115) + Select Operator [SEL_1275] (rows=143966864 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1272] (rows=143966864 width=115) - Conds:RS_1476._col0=RS_1467._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1274] (rows=143966864 width=115) + Conds:RS_1478._col0=RS_1469._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1467] + PARTITION_ONLY_SHUFFLE [RS_1469] PartitionCols:_col0 - Select Operator [SEL_1464] (rows=1957 width=4) + Select Operator [SEL_1466] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_1463] (rows=1957 width=8) + Filter Operator [FIL_1465] (rows=1957 width=8) predicate:(d_year BETWEEN 1998 AND 2000 and d_date_sk is not null) TableScan [TS_126] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1476] + SHUFFLE [RS_1478] PartitionCols:_col0 - Select Operator [SEL_1474] (rows=143966864 width=119) + Select Operator [SEL_1476] (rows=143966864 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1473] (rows=143966864 width=119) + Filter Operator [FIL_1475] (rows=143966864 width=119) predicate:(ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_130_date_dim_d_date_sk_min) AND DynamicValue(RS_130_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_130_date_dim_d_date_sk_bloom_filter))) TableScan [TS_123] (rows=144002668 width=119) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] <-Reducer 78 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1472] - Group By Operator [GBY_1471] (rows=1 width=12) + BROADCAST [RS_1474] + Group By Operator [GBY_1473] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 77 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1470] - Group By Operator [GBY_1469] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1472] + Group By Operator [GBY_1471] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1466] (rows=1957 width=4) + Select Operator [SEL_1468] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1466] <-Reducer 22 [CONTAINS] - Reduce Output Operator [RS_1193] + Reduce Output Operator [RS_1195] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1192] (rows=304320 width=231) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1191] (rows=121728 width=221) - keys:_col0, _col1, _col2, _col3, 0L,top n:100 - Select Operator [SEL_1189] (rows=40576 width=219) + Top N Key Operator [TNK_1194] (rows=304320 width=231) + keys:_col0, _col1, _col2, _col3,top n:100 + Group By Operator [GBY_1193] (rows=304320 width=231) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Select Operator [SEL_1191] (rows=40576 width=219) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1188] (rows=40576 width=244) + Filter Operator [FIL_1190] (rows=40576 width=244) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1187] (rows=121728 width=244) + Merge Join Operator [MERGEJOIN_1189] (rows=121728 width=244) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 21 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1374] - Filter Operator [FIL_1373] (rows=121728 width=132) + PARTITION_ONLY_SHUFFLE [RS_1376] + Filter Operator [FIL_1375] (rows=121728 width=132) predicate:_col3 is not null - Group By Operator [GBY_1372] (rows=121728 width=132) + Group By Operator [GBY_1374] (rows=121728 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_391] @@ -714,189 +714,189 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_388] (rows=143966864 width=127) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1169] (rows=143966864 width=127) + Merge Join Operator [MERGEJOIN_1171] (rows=143966864 width=127) Conds:RS_385._col1=RS_386._col0(Left Semi),Output:["_col2","_col3","_col6","_col7","_col8"] <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_385] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1156] (rows=143966864 width=131) - Conds:RS_380._col1=RS_1328._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_1158] (rows=143966864 width=131) + Conds:RS_380._col1=RS_1330._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1328] + SHUFFLE [RS_1330] PartitionCols:_col0 - Select Operator [SEL_1319] (rows=462000 width=15) + Select Operator [SEL_1321] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1310] (rows=462000 width=15) + Filter Operator [FIL_1312] (rows=462000 width=15) predicate:i_item_sk is not null Please refer to the previous TableScan [TS_6] <-Reducer 18 [SIMPLE_EDGE] SHUFFLE [RS_380] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1155] (rows=143966864 width=119) - Conds:RS_1367._col0=RS_1290._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1157] (rows=143966864 width=119) + Conds:RS_1369._col0=RS_1292._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1290] + PARTITION_ONLY_SHUFFLE [RS_1292] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1285] + Please refer to the previous Select Operator [SEL_1287] <-Map 80 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1367] + SHUFFLE [RS_1369] PartitionCols:_col0 - Select Operator [SEL_1366] (rows=143966864 width=123) + Select Operator [SEL_1368] (rows=143966864 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1365] (rows=143966864 width=123) + Filter Operator [FIL_1367] (rows=143966864 width=123) predicate:(ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_378_date_dim_d_date_sk_min) AND DynamicValue(RS_378_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_378_date_dim_d_date_sk_bloom_filter))) TableScan [TS_293] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_quantity","ws_list_price"] <-Reducer 23 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1364] - Group By Operator [GBY_1363] (rows=1 width=12) + BROADCAST [RS_1366] + Group By Operator [GBY_1365] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1297] - Group By Operator [GBY_1294] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1299] + Group By Operator [GBY_1296] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1291] (rows=50 width=4) + Select Operator [SEL_1293] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1285] + Please refer to the previous Select Operator [SEL_1287] <-Reducer 37 [SIMPLE_EDGE] SHUFFLE [RS_386] PartitionCols:_col0 Group By Operator [GBY_384] (rows=364 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1163] (rows=729 width=4) - Conds:RS_1329._col1, _col2, _col3=RS_1371._col0, _col1, _col2(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_1165] (rows=729 width=4) + Conds:RS_1331._col1, _col2, _col3=RS_1373._col0, _col1, _col2(Inner),Output:["_col0"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1329] + SHUFFLE [RS_1331] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1320] (rows=458612 width=15) + Select Operator [SEL_1322] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1311] (rows=458612 width=15) + Filter Operator [FIL_1313] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1371] + SHUFFLE [RS_1373] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1370] (rows=1 width=12) + Select Operator [SEL_1372] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1369] (rows=1 width=20) + Filter Operator [FIL_1371] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1368] (rows=120960 width=20) + Group By Operator [GBY_1370] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 35 [SIMPLE_EDGE] <-Reducer 34 [CONTAINS] vectorized - Reduce Output Operator [RS_1412] + Reduce Output Operator [RS_1414] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1411] (rows=120960 width=20) + Group By Operator [GBY_1413] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1410] (rows=120960 width=20) + Group By Operator [GBY_1412] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_322] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_28] <-Reducer 41 [CONTAINS] vectorized - Reduce Output Operator [RS_1426] + Reduce Output Operator [RS_1428] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1425] (rows=120960 width=20) + Group By Operator [GBY_1427] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1424] (rows=120960 width=20) + Group By Operator [GBY_1426] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 38 [SIMPLE_EDGE] SHUFFLE [RS_342] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_48] <-Reducer 45 [CONTAINS] vectorized - Reduce Output Operator [RS_1440] + Reduce Output Operator [RS_1442] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1439] (rows=120960 width=20) + Group By Operator [GBY_1441] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1438] (rows=120960 width=20) + Group By Operator [GBY_1440] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 42 [SIMPLE_EDGE] SHUFFLE [RS_363] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_69] <-Reducer 63 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1378] - Select Operator [SEL_1377] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1380] + Select Operator [SEL_1379] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1376] (rows=1 width=120) + Filter Operator [FIL_1378] (rows=1 width=120) predicate:(_col0 is not null and _col1 is not null) - Group By Operator [GBY_1375] (rows=1 width=120) + Group By Operator [GBY_1377] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 62 [CUSTOM_SIMPLE_EDGE] <-Reducer 61 [CONTAINS] - Reduce Output Operator [RS_1247] - Group By Operator [GBY_1246] (rows=1 width=120) + Reduce Output Operator [RS_1249] + Group By Operator [GBY_1248] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1245] (rows=980593145 width=112) + Select Operator [SEL_1247] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1243] (rows=550076554 width=110) + Select Operator [SEL_1245] (rows=550076554 width=110) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1242] (rows=550076554 width=110) - Conds:RS_1447._col0=RS_1390._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1244] (rows=550076554 width=110) + Conds:RS_1449._col0=RS_1392._col0(Inner),Output:["_col1","_col2"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1390] + SHUFFLE [RS_1392] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1447] + SHUFFLE [RS_1449] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1444] + Please refer to the previous Select Operator [SEL_1446] <-Reducer 70 [CONTAINS] - Reduce Output Operator [RS_1265] - Group By Operator [GBY_1264] (rows=1 width=120) + Reduce Output Operator [RS_1267] + Group By Operator [GBY_1266] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1263] (rows=980593145 width=112) + Select Operator [SEL_1265] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1261] (rows=286549727 width=115) + Select Operator [SEL_1263] (rows=286549727 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1260] (rows=286549727 width=115) - Conds:RS_1462._col0=RS_1453._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1262] (rows=286549727 width=115) + Conds:RS_1464._col0=RS_1455._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1453] + PARTITION_ONLY_SHUFFLE [RS_1455] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1449] + Please refer to the previous Select Operator [SEL_1451] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1462] + SHUFFLE [RS_1464] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1459] + Please refer to the previous Select Operator [SEL_1461] <-Reducer 76 [CONTAINS] - Reduce Output Operator [RS_1283] - Group By Operator [GBY_1282] (rows=1 width=120) + Reduce Output Operator [RS_1285] + Group By Operator [GBY_1284] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1281] (rows=980593145 width=112) + Select Operator [SEL_1283] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1279] (rows=143966864 width=115) + Select Operator [SEL_1281] (rows=143966864 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1278] (rows=143966864 width=115) - Conds:RS_1477._col0=RS_1468._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1280] (rows=143966864 width=115) + Conds:RS_1479._col0=RS_1470._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1468] + PARTITION_ONLY_SHUFFLE [RS_1470] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1466] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1477] + SHUFFLE [RS_1479] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1474] + Please refer to the previous Select Operator [SEL_1476] <-Reducer 6 [CONTAINS] - Reduce Output Operator [RS_1179] + Reduce Output Operator [RS_1181] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_1178] (rows=304320 width=231) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Top N Key Operator [TNK_1177] (rows=121728 width=221) - keys:_col0, _col1, _col2, _col3, 0L,top n:100 - Select Operator [SEL_1175] (rows=40576 width=221) + Top N Key Operator [TNK_1180] (rows=304320 width=231) + keys:_col0, _col1, _col2, _col3,top n:100 + Group By Operator [GBY_1179] (rows=304320 width=231) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L + Select Operator [SEL_1177] (rows=40576 width=221) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_1174] (rows=40576 width=244) + Filter Operator [FIL_1176] (rows=40576 width=244) predicate:(_col3 > _col5) - Merge Join Operator [MERGEJOIN_1173] (rows=121728 width=244) + Merge Join Operator [MERGEJOIN_1175] (rows=121728 width=244) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1336] - Filter Operator [FIL_1335] (rows=121728 width=132) + PARTITION_ONLY_SHUFFLE [RS_1338] + Filter Operator [FIL_1337] (rows=121728 width=132) predicate:_col3 is not null - Group By Operator [GBY_1334] (rows=121728 width=132) + Group By Operator [GBY_1336] (rows=121728 width=132) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_98] @@ -905,61 +905,61 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col3)","count()"],keys:_col0, _col1, _col2 Select Operator [SEL_95] (rows=550076554 width=122) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_1167] (rows=550076554 width=122) + Merge Join Operator [MERGEJOIN_1169] (rows=550076554 width=122) Conds:RS_92._col1=RS_93._col0(Left Semi),Output:["_col2","_col3","_col6","_col7","_col8"] <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_93] PartitionCols:_col0 Group By Operator [GBY_91] (rows=364 width=4) Output:["_col0"],keys:_col0 - Merge Join Operator [MERGEJOIN_1139] (rows=729 width=4) - Conds:RS_1322._col1, _col2, _col3=RS_1333._col0, _col1, _col2(Inner),Output:["_col0"] + Merge Join Operator [MERGEJOIN_1141] (rows=729 width=4) + Conds:RS_1324._col1, _col2, _col3=RS_1335._col0, _col1, _col2(Inner),Output:["_col0"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1322] + SHUFFLE [RS_1324] PartitionCols:_col1, _col2, _col3 - Select Operator [SEL_1313] (rows=458612 width=15) + Select Operator [SEL_1315] (rows=458612 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1304] (rows=458612 width=15) + Filter Operator [FIL_1306] (rows=458612 width=15) predicate:(i_category_id is not null and i_brand_id is not null and i_class_id is not null and i_item_sk is not null) Please refer to the previous TableScan [TS_6] <-Reducer 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1333] + SHUFFLE [RS_1335] PartitionCols:_col0, _col1, _col2 - Select Operator [SEL_1332] (rows=1 width=12) + Select Operator [SEL_1334] (rows=1 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_1331] (rows=1 width=20) + Filter Operator [FIL_1333] (rows=1 width=20) predicate:(_col3 = 3L) - Group By Operator [GBY_1330] (rows=120960 width=20) + Group By Operator [GBY_1332] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 28 [SIMPLE_EDGE] <-Reducer 27 [CONTAINS] vectorized - Reduce Output Operator [RS_1406] + Reduce Output Operator [RS_1408] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1405] (rows=120960 width=20) + Group By Operator [GBY_1407] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1404] (rows=120960 width=20) + Group By Operator [GBY_1406] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_28] <-Reducer 39 [CONTAINS] vectorized - Reduce Output Operator [RS_1420] + Reduce Output Operator [RS_1422] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1419] (rows=120960 width=20) + Group By Operator [GBY_1421] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1418] (rows=120960 width=20) + Group By Operator [GBY_1420] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 38 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2 Please refer to the previous Group By Operator [GBY_48] <-Reducer 43 [CONTAINS] vectorized - Reduce Output Operator [RS_1434] + Reduce Output Operator [RS_1436] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_1433] (rows=120960 width=20) + Group By Operator [GBY_1435] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(_col3)"],keys:_col0, _col1, _col2 - Group By Operator [GBY_1432] (rows=120960 width=20) + Group By Operator [GBY_1434] (rows=120960 width=20) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 42 [SIMPLE_EDGE] SHUFFLE [RS_70] @@ -968,106 +968,106 @@ Stage-0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_92] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1132] (rows=550076554 width=126) - Conds:RS_87._col1=RS_1321._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_1134] (rows=550076554 width=126) + Conds:RS_87._col1=RS_1323._col0(Inner),Output:["_col1","_col2","_col3","_col6","_col7","_col8"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1321] + SHUFFLE [RS_1323] PartitionCols:_col0 - Select Operator [SEL_1312] (rows=462000 width=15) + Select Operator [SEL_1314] (rows=462000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1303] (rows=462000 width=15) + Filter Operator [FIL_1305] (rows=462000 width=15) predicate:i_item_sk is not null Please refer to the previous TableScan [TS_6] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_87] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_1131] (rows=550076554 width=114) - Conds:RS_1302._col0=RS_1286._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_1133] (rows=550076554 width=114) + Conds:RS_1304._col0=RS_1288._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 10 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1286] + PARTITION_ONLY_SHUFFLE [RS_1288] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1285] + Please refer to the previous Select Operator [SEL_1287] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1302] + SHUFFLE [RS_1304] PartitionCols:_col0 - Select Operator [SEL_1301] (rows=550076554 width=118) + Select Operator [SEL_1303] (rows=550076554 width=118) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_1300] (rows=550076554 width=118) + Filter Operator [FIL_1302] (rows=550076554 width=118) predicate:(ss_sold_date_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_85_date_dim_d_date_sk_min) AND DynamicValue(RS_85_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_85_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_quantity","ss_list_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_1299] - Group By Operator [GBY_1298] (rows=1 width=12) + BROADCAST [RS_1301] + Group By Operator [GBY_1300] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1295] - Group By Operator [GBY_1292] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_1297] + Group By Operator [GBY_1294] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_1287] (rows=50 width=4) + Select Operator [SEL_1289] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_1285] + Please refer to the previous Select Operator [SEL_1287] <-Reducer 56 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1340] - Select Operator [SEL_1339] (rows=1 width=112) + PARTITION_ONLY_SHUFFLE [RS_1342] + Select Operator [SEL_1341] (rows=1 width=112) Output:["_col0"] - Filter Operator [FIL_1338] (rows=1 width=120) + Filter Operator [FIL_1340] (rows=1 width=120) predicate:(_col0 is not null and _col1 is not null) - Group By Operator [GBY_1337] (rows=1 width=120) + Group By Operator [GBY_1339] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Union 55 [CUSTOM_SIMPLE_EDGE] <-Reducer 54 [CONTAINS] - Reduce Output Operator [RS_1235] - Group By Operator [GBY_1234] (rows=1 width=120) + Reduce Output Operator [RS_1237] + Group By Operator [GBY_1236] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1233] (rows=980593145 width=112) + Select Operator [SEL_1235] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1231] (rows=550076554 width=110) + Select Operator [SEL_1233] (rows=550076554 width=110) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1230] (rows=550076554 width=110) - Conds:RS_1445._col0=RS_1387._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1232] (rows=550076554 width=110) + Conds:RS_1447._col0=RS_1389._col0(Inner),Output:["_col1","_col2"] <-Map 48 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1387] + SHUFFLE [RS_1389] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1380] + Please refer to the previous Select Operator [SEL_1382] <-Map 66 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1445] + SHUFFLE [RS_1447] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1444] + Please refer to the previous Select Operator [SEL_1446] <-Reducer 68 [CONTAINS] - Reduce Output Operator [RS_1253] - Group By Operator [GBY_1252] (rows=1 width=120) + Reduce Output Operator [RS_1255] + Group By Operator [GBY_1254] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1251] (rows=980593145 width=112) + Select Operator [SEL_1253] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1249] (rows=286549727 width=115) + Select Operator [SEL_1251] (rows=286549727 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1248] (rows=286549727 width=115) - Conds:RS_1460._col0=RS_1450._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1250] (rows=286549727 width=115) + Conds:RS_1462._col0=RS_1452._col0(Inner),Output:["_col1","_col2"] <-Map 71 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1450] + PARTITION_ONLY_SHUFFLE [RS_1452] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1449] + Please refer to the previous Select Operator [SEL_1451] <-Map 67 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1460] + SHUFFLE [RS_1462] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1459] + Please refer to the previous Select Operator [SEL_1461] <-Reducer 74 [CONTAINS] - Reduce Output Operator [RS_1271] - Group By Operator [GBY_1270] (rows=1 width=120) + Reduce Output Operator [RS_1273] + Group By Operator [GBY_1272] (rows=1 width=120) Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] - Select Operator [SEL_1269] (rows=980593145 width=112) + Select Operator [SEL_1271] (rows=980593145 width=112) Output:["_col0"] - Select Operator [SEL_1267] (rows=143966864 width=115) + Select Operator [SEL_1269] (rows=143966864 width=115) Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_1266] (rows=143966864 width=115) - Conds:RS_1475._col0=RS_1465._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_1268] (rows=143966864 width=115) + Conds:RS_1477._col0=RS_1467._col0(Inner),Output:["_col1","_col2"] <-Map 77 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_1465] + PARTITION_ONLY_SHUFFLE [RS_1467] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1464] + Please refer to the previous Select Operator [SEL_1466] <-Map 73 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_1475] + SHUFFLE [RS_1477] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_1474] + Please refer to the previous Select Operator [SEL_1476] diff --git ql/src/test/results/clientpositive/perf/tez/query15.q.out ql/src/test/results/clientpositive/perf/tez/query15.q.out index 3670a718b3..2edc99a7df 100644 --- ql/src/test/results/clientpositive/perf/tez/query15.q.out +++ ql/src/test/results/clientpositive/perf/tez/query15.q.out @@ -62,83 +62,83 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_99] - Limit [LIM_98] (rows=100 width=201) + File Output Operator [FS_104] + Limit [LIM_103] (rows=100 width=201) Number of rows:100 - Select Operator [SEL_97] (rows=10141 width=201) + Select Operator [SEL_102] (rows=10141 width=201) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_96] - Group By Operator [GBY_95] (rows=10141 width=201) + SHUFFLE [RS_101] + Group By Operator [GBY_100] (rows=10141 width=201) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 Group By Operator [GBY_24] (rows=2403417 width=201) Output:["_col0","_col1"],aggregations:["sum(_col8)"],keys:_col3 - Top N Key Operator [TNK_44] (rows=285117831 width=212) - keys:_col3,top n:100 - Select Operator [SEL_23] (rows=285117831 width=212) - Output:["_col3","_col8"] + Select Operator [SEL_23] (rows=285117831 width=212) + Output:["_col3","_col8"] + Top N Key Operator [TNK_47] (rows=285117831 width=212) + keys:_col3,top n:100 Filter Operator [FIL_22] (rows=285117831 width=212) predicate:(_col9 or _col4 or _col5) - Merge Join Operator [MERGEJOIN_77] (rows=285117831 width=212) + Merge Join Operator [MERGEJOIN_82] (rows=285117831 width=212) Conds:RS_19._col0=RS_20._col1(Inner),Output:["_col3","_col4","_col5","_col8","_col9"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_75] (rows=80000000 width=101) - Conds:RS_80._col1=RS_83._col0(Inner),Output:["_col0","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_80] (rows=80000000 width=101) + Conds:RS_85._col1=RS_88._col0(Inner),Output:["_col0","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_80] + SHUFFLE [RS_85] PartitionCols:_col1 - Select Operator [SEL_79] (rows=80000000 width=8) + Select Operator [SEL_84] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_78] (rows=80000000 width=8) + Filter Operator [FIL_83] (rows=80000000 width=8) predicate:(c_customer_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_83] + SHUFFLE [RS_88] PartitionCols:_col0 - Select Operator [SEL_82] (rows=40000000 width=101) + Select Operator [SEL_87] (rows=40000000 width=101) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_81] (rows=40000000 width=179) + Filter Operator [FIL_86] (rows=40000000 width=179) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=179) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state","ca_zip"] <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_76] (rows=285117831 width=119) - Conds:RS_94._col0=RS_86._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_81] (rows=285117831 width=119) + Conds:RS_99._col0=RS_91._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_86] + SHUFFLE [RS_91] PartitionCols:_col0 - Select Operator [SEL_85] (rows=130 width=4) + Select Operator [SEL_90] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_84] (rows=130 width=12) + Filter Operator [FIL_89] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2) and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_94] + SHUFFLE [RS_99] PartitionCols:_col0 - Select Operator [SEL_93] (rows=285117831 width=123) + Select Operator [SEL_98] (rows=285117831 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_92] (rows=285117831 width=119) + Filter Operator [FIL_97] (rows=285117831 width=119) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_13_date_dim_d_date_sk_min) AND DynamicValue(RS_13_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_13_date_dim_d_date_sk_bloom_filter))) TableScan [TS_6] (rows=287989836 width=119) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_sales_price"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_91] - Group By Operator [GBY_90] (rows=1 width=12) + BROADCAST [RS_96] + Group By Operator [GBY_95] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_89] - Group By Operator [GBY_88] (rows=1 width=12) + SHUFFLE [RS_94] + Group By Operator [GBY_93] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_87] (rows=130 width=4) + Select Operator [SEL_92] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_85] + Please refer to the previous Select Operator [SEL_90] diff --git ql/src/test/results/clientpositive/perf/tez/query17.q.out ql/src/test/results/clientpositive/perf/tez/query17.q.out index df70fbc46e..5b99aaac18 100644 --- ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -122,148 +122,148 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_253] - Limit [LIM_252] (rows=100 width=466) + File Output Operator [FS_258] + Limit [LIM_257] (rows=100 width=466) Number of rows:100 - Select Operator [SEL_251] (rows=97302218447 width=466) + Select Operator [SEL_256] (rows=97302218447 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] - Select Operator [SEL_249] (rows=97302218447 width=466) + SHUFFLE [RS_255] + Select Operator [SEL_254] (rows=97302218447 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_248] (rows=97302218447 width=466) + Group By Operator [GBY_253] (rows=97302218447 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_50] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_49] (rows=97302218447 width=466) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 - Top N Key Operator [TNK_93] (rows=97302218447 width=381) - keys:_col0, _col1, _col2,top n:100 - Select Operator [SEL_47] (rows=97302218447 width=381) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Merge Join Operator [MERGEJOIN_213] (rows=97302218447 width=381) - Conds:RS_44._col3=RS_247._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] + Select Operator [SEL_47] (rows=97302218447 width=381) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Top N Key Operator [TNK_96] (rows=97302218447 width=381) + keys:_col8, _col9, _col22,top n:100 + Merge Join Operator [MERGEJOIN_218] (rows=97302218447 width=381) + Conds:RS_44._col3=RS_252._col0(Inner),Output:["_col5","_col8","_col9","_col13","_col19","_col22"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_247] + SHUFFLE [RS_252] PartitionCols:_col0 - Select Operator [SEL_246] (rows=1704 width=90) + Select Operator [SEL_251] (rows=1704 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_245] (rows=1704 width=90) + Filter Operator [FIL_250] (rows=1704 width=90) predicate:s_store_sk is not null TableScan [TS_32] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_212] (rows=97302218447 width=299) + Merge Join Operator [MERGEJOIN_217] (rows=97302218447 width=299) Conds:RS_41._col1, _col2, _col4=RS_42._col6, _col7, _col8(Inner),Output:["_col3","_col5","_col8","_col9","_col13","_col19"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_211] (rows=10910732684 width=19) + Merge Join Operator [MERGEJOIN_216] (rows=10910732684 width=19) Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_209] (rows=285117831 width=11) - Conds:RS_241._col0=RS_222._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_214] (rows=285117831 width=11) + Conds:RS_246._col0=RS_227._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_222] + PARTITION_ONLY_SHUFFLE [RS_227] PartitionCols:_col0 - Select Operator [SEL_218] (rows=304 width=4) + Select Operator [SEL_223] (rows=304 width=4) Output:["_col0"] - Filter Operator [FIL_215] (rows=304 width=94) + Filter Operator [FIL_220] (rows=304 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=94) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_quarter_name"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] + SHUFFLE [RS_246] PartitionCols:_col0 - Select Operator [SEL_240] (rows=285117831 width=15) + Select Operator [SEL_245] (rows=285117831 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_239] (rows=285117831 width=15) + Filter Operator [FIL_244] (rows=285117831 width=15) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_26_d3_d_date_sk_min) AND DynamicValue(RS_26_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_26_d3_d_date_sk_bloom_filter))) TableScan [TS_9] (rows=287989836 width=15) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_238] - Group By Operator [GBY_237] (rows=1 width=12) + BROADCAST [RS_243] + Group By Operator [GBY_242] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] - Group By Operator [GBY_226] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_233] + Group By Operator [GBY_231] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_223] (rows=304 width=4) + Select Operator [SEL_228] (rows=304 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] + Please refer to the previous Select Operator [SEL_223] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_210] (rows=53632139 width=15) - Conds:RS_244._col0=RS_224._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_215] (rows=53632139 width=15) + Conds:RS_249._col0=RS_229._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_224] + PARTITION_ONLY_SHUFFLE [RS_229] PartitionCols:_col0 - Select Operator [SEL_219] (rows=304 width=4) + Select Operator [SEL_224] (rows=304 width=4) Output:["_col0"] - Filter Operator [FIL_216] (rows=304 width=94) + Filter Operator [FIL_221] (rows=304 width=94) predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_244] + SHUFFLE [RS_249] PartitionCols:_col0 - Select Operator [SEL_243] (rows=53632139 width=19) + Select Operator [SEL_248] (rows=53632139 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_242] (rows=53632139 width=19) + Filter Operator [FIL_247] (rows=53632139 width=19) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) TableScan [TS_15] (rows=57591150 width=19) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_208] (rows=501694138 width=303) - Conds:RS_38._col1=RS_236._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"] + Merge Join Operator [MERGEJOIN_213] (rows=501694138 width=303) + Conds:RS_38._col1=RS_241._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col8","_col9"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] + SHUFFLE [RS_241] PartitionCols:_col0 - Select Operator [SEL_235] (rows=462000 width=288) + Select Operator [SEL_240] (rows=462000 width=288) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_234] (rows=462000 width=288) + Filter Operator [FIL_239] (rows=462000 width=288) predicate:i_item_sk is not null TableScan [TS_6] (rows=462000 width=288) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_207] (rows=501694138 width=19) - Conds:RS_233._col0=RS_220._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_212] (rows=501694138 width=19) + Conds:RS_238._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_220] + PARTITION_ONLY_SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_217] (rows=101 width=4) + Select Operator [SEL_222] (rows=101 width=4) Output:["_col0"] - Filter Operator [FIL_214] (rows=101 width=94) + Filter Operator [FIL_219] (rows=101 width=94) predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] + SHUFFLE [RS_238] PartitionCols:_col0 - Select Operator [SEL_232] (rows=501694138 width=23) + Select Operator [SEL_237] (rows=501694138 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_231] (rows=501694138 width=23) + Filter Operator [FIL_236] (rows=501694138 width=23) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=23) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_230] - Group By Operator [GBY_229] (rows=1 width=12) + BROADCAST [RS_235] + Group By Operator [GBY_234] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_227] - Group By Operator [GBY_225] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_232] + Group By Operator [GBY_230] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_221] (rows=101 width=4) + Select Operator [SEL_226] (rows=101 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_217] + Please refer to the previous Select Operator [SEL_222] diff --git ql/src/test/results/clientpositive/perf/tez/query25.q.out ql/src/test/results/clientpositive/perf/tez/query25.q.out index d006795c79..149f395f40 100644 --- ql/src/test/results/clientpositive/perf/tez/query25.q.out +++ ql/src/test/results/clientpositive/perf/tez/query25.q.out @@ -128,144 +128,144 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_253] - Limit [LIM_252] (rows=100 width=808) + File Output Operator [FS_258] + Limit [LIM_257] (rows=100 width=808) Number of rows:100 - Select Operator [SEL_251] (rows=97302218447 width=808) + Select Operator [SEL_256] (rows=97302218447 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] - Group By Operator [GBY_249] (rows=97302218447 width=808) + SHUFFLE [RS_255] + Group By Operator [GBY_254] (rows=97302218447 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_48] (rows=97302218447 width=808) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col5)","sum(_col16)","sum(_col10)"],keys:_col19, _col20, _col22, _col23 - Top N Key Operator [TNK_95] (rows=97302218447 width=807) + Top N Key Operator [TNK_98] (rows=97302218447 width=807) keys:_col19, _col20, _col22, _col23,top n:100 - Merge Join Operator [MERGEJOIN_214] (rows=97302218447 width=807) - Conds:RS_44._col3=RS_248._col0(Inner),Output:["_col5","_col10","_col16","_col19","_col20","_col22","_col23"] + Merge Join Operator [MERGEJOIN_219] (rows=97302218447 width=807) + Conds:RS_44._col3=RS_253._col0(Inner),Output:["_col5","_col10","_col16","_col19","_col20","_col22","_col23"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_248] + SHUFFLE [RS_253] PartitionCols:_col0 - Select Operator [SEL_247] (rows=1704 width=192) + Select Operator [SEL_252] (rows=1704 width=192) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_246] (rows=1704 width=192) + Filter Operator [FIL_251] (rows=1704 width=192) predicate:s_store_sk is not null TableScan [TS_32] (rows=1704 width=192) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_213] (rows=97302218447 width=623) - Conds:RS_41._col1=RS_245._col0(Inner),Output:["_col3","_col5","_col10","_col16","_col19","_col20"] + Merge Join Operator [MERGEJOIN_218] (rows=97302218447 width=623) + Conds:RS_41._col1=RS_250._col0(Inner),Output:["_col3","_col5","_col10","_col16","_col19","_col20"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_245] + SHUFFLE [RS_250] PartitionCols:_col0 - Select Operator [SEL_244] (rows=462000 width=288) + Select Operator [SEL_249] (rows=462000 width=288) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_243] (rows=462000 width=288) + Filter Operator [FIL_248] (rows=462000 width=288) predicate:i_item_sk is not null TableScan [TS_29] (rows=462000 width=288) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_212] (rows=97302218447 width=343) + Merge Join Operator [MERGEJOIN_217] (rows=97302218447 width=343) Conds:RS_38._col1, _col2, _col4=RS_39._col6, _col7, _col8(Inner),Output:["_col1","_col3","_col5","_col10","_col16"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_39] PartitionCols:_col6, _col7, _col8 - Merge Join Operator [MERGEJOIN_211] (rows=10910732684 width=235) + Merge Join Operator [MERGEJOIN_216] (rows=10910732684 width=235) Conds:RS_25._col2, _col1=RS_26._col1, _col2(Inner),Output:["_col3","_col6","_col7","_col8","_col9"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_209] (rows=285117831 width=119) - Conds:RS_239._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_214] (rows=285117831 width=119) + Conds:RS_244._col0=RS_228._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_223] + PARTITION_ONLY_SHUFFLE [RS_228] PartitionCols:_col0 - Select Operator [SEL_219] (rows=351 width=4) + Select Operator [SEL_224] (rows=351 width=4) Output:["_col0"] - Filter Operator [FIL_216] (rows=351 width=12) + Filter Operator [FIL_221] (rows=351 width=12) predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_238] (rows=285117831 width=123) + Select Operator [SEL_243] (rows=285117831 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_237] (rows=285117831 width=123) + Filter Operator [FIL_242] (rows=285117831 width=123) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_23_d3_d_date_sk_min) AND DynamicValue(RS_23_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_23_d3_d_date_sk_bloom_filter))) TableScan [TS_6] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_net_profit"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_236] - Group By Operator [GBY_235] (rows=1 width=12) + BROADCAST [RS_241] + Group By Operator [GBY_240] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_229] - Group By Operator [GBY_227] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_234] + Group By Operator [GBY_232] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_224] (rows=351 width=4) + Select Operator [SEL_229] (rows=351 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] + Please refer to the previous Select Operator [SEL_224] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_210] (rows=53632139 width=119) - Conds:RS_242._col0=RS_225._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_215] (rows=53632139 width=119) + Conds:RS_247._col0=RS_230._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_225] + PARTITION_ONLY_SHUFFLE [RS_230] PartitionCols:_col0 - Select Operator [SEL_220] (rows=351 width=4) + Select Operator [SEL_225] (rows=351 width=4) Output:["_col0"] - Filter Operator [FIL_217] (rows=351 width=12) + Filter Operator [FIL_222] (rows=351 width=12) predicate:((d_year = 2000) and d_moy BETWEEN 4 AND 10 and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] + SHUFFLE [RS_247] PartitionCols:_col0 - Select Operator [SEL_241] (rows=53632139 width=123) + Select Operator [SEL_246] (rows=53632139 width=123) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_240] (rows=53632139 width=123) + Filter Operator [FIL_245] (rows=53632139 width=123) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) TableScan [TS_12] (rows=57591150 width=123) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_net_loss"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_38] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_208] (rows=501694138 width=122) - Conds:RS_234._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_213] (rows=501694138 width=122) + Conds:RS_239._col0=RS_226._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_221] + PARTITION_ONLY_SHUFFLE [RS_226] PartitionCols:_col0 - Select Operator [SEL_218] (rows=50 width=4) + Select Operator [SEL_223] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_215] (rows=50 width=12) + Filter Operator [FIL_220] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 4) and d_date_sk is not null) Please refer to the previous TableScan [TS_3] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + SHUFFLE [RS_239] PartitionCols:_col0 - Select Operator [SEL_233] (rows=501694138 width=126) + Select Operator [SEL_238] (rows=501694138 width=126) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_232] (rows=501694138 width=126) + Filter Operator [FIL_237] (rows=501694138 width=126) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_36_d1_d_date_sk_min) AND DynamicValue(RS_36_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_36_d1_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=126) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_net_profit"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_231] - Group By Operator [GBY_230] (rows=1 width=12) + BROADCAST [RS_236] + Group By Operator [GBY_235] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_228] - Group By Operator [GBY_226] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_233] + Group By Operator [GBY_231] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=50 width=4) + Select Operator [SEL_227] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] + Please refer to the previous Select Operator [SEL_223] diff --git ql/src/test/results/clientpositive/perf/tez/query26.q.out ql/src/test/results/clientpositive/perf/tez/query26.q.out index a1bf3b099b..baf533f2c7 100644 --- ql/src/test/results/clientpositive/perf/tez/query26.q.out +++ ql/src/test/results/clientpositive/perf/tez/query26.q.out @@ -67,95 +67,95 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_125] - Limit [LIM_124] (rows=100 width=444) + File Output Operator [FS_130] + Limit [LIM_129] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_123] (rows=310774 width=444) + Select Operator [SEL_128] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - Select Operator [SEL_121] (rows=310774 width=444) + SHUFFLE [RS_127] + Select Operator [SEL_126] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_120] (rows=310774 width=476) + Group By Operator [GBY_125] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 Group By Operator [GBY_28] (rows=462000 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col12 - Top N Key Operator [TNK_55] (rows=2317924 width=231) + Top N Key Operator [TNK_58] (rows=2317924 width=231) keys:_col12,top n:100 - Merge Join Operator [MERGEJOIN_99] (rows=2317924 width=231) - Conds:RS_24._col2=RS_119._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] + Merge Join Operator [MERGEJOIN_104] (rows=2317924 width=231) + Conds:RS_24._col2=RS_124._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] + SHUFFLE [RS_124] PartitionCols:_col0 - Select Operator [SEL_118] (rows=462000 width=104) + Select Operator [SEL_123] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_117] (rows=462000 width=104) + Filter Operator [FIL_122] (rows=462000 width=104) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_98] (rows=2317924 width=135) - Conds:RS_21._col3=RS_116._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_103] (rows=2317924 width=135) + Conds:RS_21._col3=RS_121._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] + SHUFFLE [RS_121] PartitionCols:_col0 - Select Operator [SEL_115] (rows=2300 width=4) + Select Operator [SEL_120] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_114] (rows=2300 width=174) + Filter Operator [FIL_119] (rows=2300 width=174) predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) TableScan [TS_9] (rows=2300 width=174) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_97] (rows=2317924 width=137) - Conds:RS_18._col0=RS_113._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_102] (rows=2317924 width=137) + Conds:RS_18._col0=RS_118._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_113] + SHUFFLE [RS_118] PartitionCols:_col0 - Select Operator [SEL_112] (rows=652 width=4) + Select Operator [SEL_117] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_111] (rows=652 width=8) + Filter Operator [FIL_116] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_96] (rows=2317924 width=139) - Conds:RS_110._col1=RS_102._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_101] (rows=2317924 width=139) + Conds:RS_115._col1=RS_107._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] + PARTITION_ONLY_SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_101] (rows=14776 width=4) + Select Operator [SEL_106] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_100] (rows=14776 width=268) + Filter Operator [FIL_105] (rows=14776 width=268) predicate:((cd_marital_status = 'W') and (cd_education_status = 'Primary') and (cd_gender = 'F') and cd_demo_sk is not null) TableScan [TS_3] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] + SHUFFLE [RS_115] PartitionCols:_col1 - Select Operator [SEL_109] (rows=283691050 width=354) + Select Operator [SEL_114] (rows=283691050 width=354) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_108] (rows=283691050 width=354) + Filter Operator [FIL_113] (rows=283691050 width=354) predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_bill_cdemo_sk is not null and cs_item_sk is not null and cs_bill_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(cs_bill_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=354) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_107] - Group By Operator [GBY_106] (rows=1 width=12) + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_105] - Group By Operator [GBY_104] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_103] (rows=14776 width=4) + Select Operator [SEL_108] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_101] + Please refer to the previous Select Operator [SEL_106] diff --git ql/src/test/results/clientpositive/perf/tez/query27.q.out ql/src/test/results/clientpositive/perf/tez/query27.q.out index 6f49de2344..774c0fd192 100644 --- ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -71,97 +71,97 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_126] - Limit [LIM_125] (rows=100 width=538) + File Output Operator [FS_128] + Limit [LIM_127] (rows=100 width=538) Number of rows:100 - Select Operator [SEL_124] (rows=6526254 width=538) + Select Operator [SEL_126] (rows=6526254 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - Select Operator [SEL_122] (rows=6526254 width=538) + SHUFFLE [RS_125] + Select Operator [SEL_124] (rows=6526254 width=538) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Group By Operator [GBY_121] (rows=6526254 width=570) + Group By Operator [GBY_123] (rows=6526254 width=570) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_29] (rows=13907934 width=570) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_56] (rows=4635978 width=186) - keys:_col0, _col1, 0L,top n:100 + Top N Key Operator [TNK_58] (rows=13907934 width=570) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_29] (rows=13907934 width=570) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L Select Operator [SEL_27] (rows=4635978 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_100] (rows=4635978 width=186) - Conds:RS_24._col1=RS_120._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] + Merge Join Operator [MERGEJOIN_102] (rows=4635978 width=186) + Conds:RS_24._col1=RS_122._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col11","_col13"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_119] (rows=462000 width=104) + Select Operator [SEL_121] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_118] (rows=462000 width=104) + Filter Operator [FIL_120] (rows=462000 width=104) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_99] (rows=4635978 width=90) - Conds:RS_21._col3=RS_117._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] + Merge Join Operator [MERGEJOIN_101] (rows=4635978 width=90) + Conds:RS_21._col3=RS_119._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col11"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_116] (rows=209 width=90) + Select Operator [SEL_118] (rows=209 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=209 width=90) + Filter Operator [FIL_117] (rows=209 width=90) predicate:((s_state) IN ('SD', 'FL', 'MI', 'LA', 'MO', 'SC') and s_store_sk is not null) TableScan [TS_9] (rows=1704 width=90) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_98] (rows=4635978 width=4) - Conds:RS_18._col0=RS_114._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_100] (rows=4635978 width=4) + Conds:RS_18._col0=RS_116._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + SHUFFLE [RS_116] PartitionCols:_col0 - Select Operator [SEL_113] (rows=652 width=4) + Select Operator [SEL_115] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_112] (rows=652 width=8) + Filter Operator [FIL_114] (rows=652 width=8) predicate:((d_year = 2001) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_97] (rows=4635978 width=4) - Conds:RS_111._col2=RS_103._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_99] (rows=4635978 width=4) + Conds:RS_113._col2=RS_105._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_103] + PARTITION_ONLY_SHUFFLE [RS_105] PartitionCols:_col0 - Select Operator [SEL_102] (rows=14776 width=4) + Select Operator [SEL_104] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_101] (rows=14776 width=268) + Filter Operator [FIL_103] (rows=14776 width=268) predicate:((cd_marital_status = 'U') and (cd_education_status = '2 yr Degree') and (cd_gender = 'M') and cd_demo_sk is not null) TableScan [TS_3] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] + SHUFFLE [RS_113] PartitionCols:_col2 - Select Operator [SEL_110] (rows=501690006 width=340) + Select Operator [SEL_112] (rows=501690006 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_109] (rows=501690006 width=340) + Filter Operator [FIL_111] (rows=501690006 width=340) predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=340) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_store_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) + BROADCAST [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_108] + Group By Operator [GBY_107] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=14776 width=4) + Select Operator [SEL_106] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] + Please refer to the previous Select Operator [SEL_104] diff --git ql/src/test/results/clientpositive/perf/tez/query29.q.out ql/src/test/results/clientpositive/perf/tez/query29.q.out index 5066893829..6be8366385 100644 --- ql/src/test/results/clientpositive/perf/tez/query29.q.out +++ ql/src/test/results/clientpositive/perf/tez/query29.q.out @@ -126,147 +126,147 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_244] - Limit [LIM_243] (rows=100 width=496) + File Output Operator [FS_249] + Limit [LIM_248] (rows=100 width=496) Number of rows:100 - Select Operator [SEL_242] (rows=478292911 width=496) + Select Operator [SEL_247] (rows=478292911 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_241] - Group By Operator [GBY_240] (rows=478292911 width=496) + SHUFFLE [RS_246] + Group By Operator [GBY_245] (rows=478292911 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_48] (rows=478292911 width=496) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col13)","sum(_col19)","sum(_col3)"],keys:_col6, _col7, _col22, _col23 - Top N Key Operator [TNK_93] (rows=97302218301 width=483) + Top N Key Operator [TNK_96] (rows=97302218301 width=483) keys:_col6, _col7, _col22, _col23,top n:100 - Merge Join Operator [MERGEJOIN_205] (rows=97302218301 width=483) + Merge Join Operator [MERGEJOIN_210] (rows=97302218301 width=483) Conds:RS_44._col2, _col1=RS_45._col11, _col12(Inner),Output:["_col3","_col6","_col7","_col13","_col19","_col22","_col23"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col11, _col12 Select Operator [SEL_40] (rows=478292911 width=487) Output:["_col1","_col2","_col8","_col11","_col12","_col14","_col17","_col18"] - Merge Join Operator [MERGEJOIN_204] (rows=478292911 width=487) - Conds:RS_37._col3=RS_239._col0(Inner),Output:["_col5","_col8","_col9","_col11","_col14","_col15","_col17","_col18"] + Merge Join Operator [MERGEJOIN_209] (rows=478292911 width=487) + Conds:RS_37._col3=RS_244._col0(Inner),Output:["_col5","_col8","_col9","_col11","_col14","_col15","_col17","_col18"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_238] (rows=1704 width=192) + Select Operator [SEL_243] (rows=1704 width=192) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_237] (rows=1704 width=192) + Filter Operator [FIL_242] (rows=1704 width=192) predicate:s_store_sk is not null TableScan [TS_25] (rows=1704 width=192) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_203] (rows=478292911 width=303) - Conds:RS_34._col1=RS_236._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col11","_col14","_col15"] + Merge Join Operator [MERGEJOIN_208] (rows=478292911 width=303) + Conds:RS_34._col1=RS_241._col0(Inner),Output:["_col3","_col5","_col8","_col9","_col11","_col14","_col15"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_236] + SHUFFLE [RS_241] PartitionCols:_col0 - Select Operator [SEL_235] (rows=462000 width=288) + Select Operator [SEL_240] (rows=462000 width=288) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_234] (rows=462000 width=288) + Filter Operator [FIL_239] (rows=462000 width=288) predicate:i_item_sk is not null TableScan [TS_22] (rows=462000 width=288) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_34] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_202] (rows=478292911 width=23) + Merge Join Operator [MERGEJOIN_207] (rows=478292911 width=23) Conds:RS_31._col1, _col2, _col4=RS_32._col1, _col2, _col3(Inner),Output:["_col1","_col3","_col5","_col8","_col9","_col11"] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_201] (rows=53632139 width=15) - Conds:RS_233._col0=RS_223._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_206] (rows=53632139 width=15) + Conds:RS_238._col0=RS_228._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] + SHUFFLE [RS_228] PartitionCols:_col0 - Select Operator [SEL_220] (rows=201 width=4) + Select Operator [SEL_225] (rows=201 width=4) Output:["_col0"] - Filter Operator [FIL_218] (rows=201 width=12) + Filter Operator [FIL_223] (rows=201 width=12) predicate:((d_year = 1999) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=12) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_233] + SHUFFLE [RS_238] PartitionCols:_col0 - Select Operator [SEL_232] (rows=53632139 width=19) + Select Operator [SEL_237] (rows=53632139 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_231] (rows=53632139 width=19) + Filter Operator [FIL_236] (rows=53632139 width=19) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_item_sk is not null and sr_ticket_number is not null) TableScan [TS_12] (rows=57591150 width=19) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_200] (rows=501694138 width=19) - Conds:RS_230._col0=RS_221._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_205] (rows=501694138 width=19) + Conds:RS_235._col0=RS_226._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] + SHUFFLE [RS_226] PartitionCols:_col0 - Select Operator [SEL_219] (rows=50 width=4) + Select Operator [SEL_224] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_217] (rows=50 width=12) + Filter Operator [FIL_222] (rows=50 width=12) predicate:((d_year = 1999) and (d_moy = 4) and d_date_sk is not null) Please refer to the previous TableScan [TS_9] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_230] + SHUFFLE [RS_235] PartitionCols:_col0 - Select Operator [SEL_229] (rows=501694138 width=23) + Select Operator [SEL_234] (rows=501694138 width=23) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_228] (rows=501694138 width=23) + Filter Operator [FIL_233] (rows=501694138 width=23) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_29_d1_d_date_sk_min) AND DynamicValue(RS_29_d1_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_29_d1_d_date_sk_bloom_filter))) TableScan [TS_6] (rows=575995635 width=23) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] <-Reducer 14 [BROADCAST_EDGE] vectorized - BROADCAST [RS_227] - Group By Operator [GBY_226] (rows=1 width=12) + BROADCAST [RS_232] + Group By Operator [GBY_231] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 13 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - Group By Operator [GBY_224] (rows=1 width=12) + SHUFFLE [RS_230] + Group By Operator [GBY_229] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=50 width=4) + Select Operator [SEL_227] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_219] + Please refer to the previous Select Operator [SEL_224] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_199] (rows=285117831 width=11) - Conds:RS_216._col0=RS_208._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_204] (rows=285117831 width=11) + Conds:RS_221._col0=RS_213._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_208] + PARTITION_ONLY_SHUFFLE [RS_213] PartitionCols:_col0 - Select Operator [SEL_207] (rows=1957 width=4) + Select Operator [SEL_212] (rows=1957 width=4) Output:["_col0"] - Filter Operator [FIL_206] (rows=1957 width=8) + Filter Operator [FIL_211] (rows=1957 width=8) predicate:((d_year) IN (1999, 2000, 2001) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=8) default@date_dim,d3,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_216] + SHUFFLE [RS_221] PartitionCols:_col0 - Select Operator [SEL_215] (rows=285117831 width=15) + Select Operator [SEL_220] (rows=285117831 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_214] (rows=285117831 width=15) + Filter Operator [FIL_219] (rows=285117831 width=15) predicate:(cs_sold_date_sk is not null and cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_42_d3_d_date_sk_min) AND DynamicValue(RS_42_d3_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_42_d3_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=15) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_213] - Group By Operator [GBY_212] (rows=1 width=12) + BROADCAST [RS_218] + Group By Operator [GBY_217] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_211] - Group By Operator [GBY_210] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_216] + Group By Operator [GBY_215] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_209] (rows=1957 width=4) + Select Operator [SEL_214] (rows=1957 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_207] + Please refer to the previous Select Operator [SEL_212] diff --git ql/src/test/results/clientpositive/perf/tez/query35.q.out ql/src/test/results/clientpositive/perf/tez/query35.q.out index 265c51bb72..74e539fd90 100644 --- ql/src/test/results/clientpositive/perf/tez/query35.q.out +++ ql/src/test/results/clientpositive/perf/tez/query35.q.out @@ -153,82 +153,82 @@ Stage-0 limit:-1 Stage-1 Reducer 8 vectorized - File Output Operator [FS_229] - Limit [LIM_228] (rows=1 width=352) + File Output Operator [FS_234] + Limit [LIM_233] (rows=1 width=352) Number of rows:100 - Select Operator [SEL_227] (rows=1 width=352) + Select Operator [SEL_232] (rows=1 width=352) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] - Select Operator [SEL_225] (rows=1 width=352) + SHUFFLE [RS_231] + Select Operator [SEL_230] (rows=1 width=352) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] - Group By Operator [GBY_224] (rows=1 width=336) + Group By Operator [GBY_229] (rows=1 width=336) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","max(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","max(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","max(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_66] (rows=2 width=336) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","sum(_col8)","count(_col8)","max(_col8)","sum(_col9)","count(_col9)","max(_col9)","sum(_col10)","count(_col10)","max(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_104] (rows=1401496 width=276) - keys:_col4, _col6, _col7, _col8, _col9, _col10,top n:100 - Select Operator [SEL_65] (rows=1401496 width=276) - Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Select Operator [SEL_65] (rows=1401496 width=276) + Output:["_col4","_col6","_col7","_col8","_col9","_col10"] + Top N Key Operator [TNK_107] (rows=1401496 width=276) + keys:_col4, _col6, _col7, _col8, _col9, _col10,top n:100 Filter Operator [FIL_64] (rows=1401496 width=276) predicate:(_col11 is not null or _col13 is not null) - Merge Join Operator [MERGEJOIN_182] (rows=1401496 width=276) - Conds:RS_61._col0=RS_223._col1(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col13"] + Merge Join Operator [MERGEJOIN_187] (rows=1401496 width=276) + Conds:RS_61._col0=RS_228._col1(Left Outer),Output:["_col4","_col6","_col7","_col8","_col9","_col10","_col11","_col13"] <-Reducer 5 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_61] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_181] (rows=1414922 width=276) - Conds:RS_58._col0=RS_215._col1(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_186] (rows=1414922 width=276) + Conds:RS_58._col0=RS_220._col1(Left Outer),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10","_col11"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_215] + SHUFFLE [RS_220] PartitionCols:_col1 - Select Operator [SEL_214] (rows=1414922 width=7) + Select Operator [SEL_219] (rows=1414922 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_213] (rows=1414922 width=3) + Group By Operator [GBY_218] (rows=1414922 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0 Group By Operator [GBY_29] (rows=143930993 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_178] (rows=143930993 width=3) - Conds:RS_212._col0=RS_196._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_183] (rows=143930993 width=3) + Conds:RS_217._col0=RS_201._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] + SHUFFLE [RS_201] PartitionCols:_col0 - Select Operator [SEL_193] (rows=652 width=4) + Select Operator [SEL_198] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_192] (rows=652 width=12) + Filter Operator [FIL_197] (rows=652 width=12) predicate:((d_year = 1999) and (d_qoy < 4) and d_date_sk is not null) TableScan [TS_12] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_212] + SHUFFLE [RS_217] PartitionCols:_col0 - Select Operator [SEL_211] (rows=143930993 width=7) + Select Operator [SEL_216] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_210] (rows=143930993 width=7) + Filter Operator [FIL_215] (rows=143930993 width=7) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) TableScan [TS_19] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_209] - Group By Operator [GBY_208] (rows=1 width=12) + BROADCAST [RS_214] + Group By Operator [GBY_213] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_202] - Group By Operator [GBY_200] (rows=1 width=12) + SHUFFLE [RS_207] + Group By Operator [GBY_205] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_197] (rows=652 width=4) + Select Operator [SEL_202] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_198] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_58] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_180] (rows=525327388 width=272) + Merge Join Operator [MERGEJOIN_185] (rows=525327388 width=272) Conds:RS_55._col0=RS_56._col0(Left Semi),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_56] @@ -237,105 +237,105 @@ Stage-0 Output:["_col0"],keys:_col0 Select Operator [SEL_18] (rows=525327388 width=3) Output:["_col0"] - Merge Join Operator [MERGEJOIN_177] (rows=525327388 width=3) - Conds:RS_207._col0=RS_194._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_182] (rows=525327388 width=3) + Conds:RS_212._col0=RS_199._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_194] + SHUFFLE [RS_199] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_198] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_207] + SHUFFLE [RS_212] PartitionCols:_col0 - Select Operator [SEL_206] (rows=525327388 width=7) + Select Operator [SEL_211] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_205] (rows=525327388 width=7) + Filter Operator [FIL_210] (rows=525327388 width=7) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) TableScan [TS_9] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_204] - Group By Operator [GBY_203] (rows=1 width=12) + BROADCAST [RS_209] + Group By Operator [GBY_208] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_201] - Group By Operator [GBY_199] (rows=1 width=12) + SHUFFLE [RS_206] + Group By Operator [GBY_204] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_195] (rows=652 width=4) + Select Operator [SEL_200] (rows=652 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_198] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_176] (rows=78293105 width=272) - Conds:RS_50._col1=RS_191._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_181] (rows=78293105 width=272) + Conds:RS_50._col1=RS_196._col0(Inner),Output:["_col0","_col4","_col6","_col7","_col8","_col9","_col10"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_191] + SHUFFLE [RS_196] PartitionCols:_col0 - Select Operator [SEL_190] (rows=1861800 width=186) + Select Operator [SEL_195] (rows=1861800 width=186) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_189] (rows=1861800 width=186) + Filter Operator [FIL_194] (rows=1861800 width=186) predicate:cd_demo_sk is not null TableScan [TS_6] (rows=1861800 width=186) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_50] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_175] (rows=77201384 width=93) - Conds:RS_185._col2=RS_188._col0(Inner),Output:["_col0","_col1","_col4"] + Merge Join Operator [MERGEJOIN_180] (rows=77201384 width=93) + Conds:RS_190._col2=RS_193._col0(Inner),Output:["_col0","_col1","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_185] + SHUFFLE [RS_190] PartitionCols:_col2 - Select Operator [SEL_184] (rows=77201384 width=11) + Select Operator [SEL_189] (rows=77201384 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_183] (rows=77201384 width=11) + Filter Operator [FIL_188] (rows=77201384 width=11) predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=11) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_188] + SHUFFLE [RS_193] PartitionCols:_col0 - Select Operator [SEL_187] (rows=40000000 width=90) + Select Operator [SEL_192] (rows=40000000 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_186] (rows=40000000 width=90) + Filter Operator [FIL_191] (rows=40000000 width=90) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=90) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_223] + SHUFFLE [RS_228] PartitionCols:_col1 - Select Operator [SEL_222] (rows=1401496 width=7) + Select Operator [SEL_227] (rows=1401496 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_221] (rows=1401496 width=3) + Group By Operator [GBY_226] (rows=1401496 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col0 Group By Operator [GBY_43] (rows=285115246 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_179] (rows=285115246 width=3) - Conds:RS_220._col0=RS_198._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_184] (rows=285115246 width=3) + Conds:RS_225._col0=RS_203._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + SHUFFLE [RS_203] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_193] + Please refer to the previous Select Operator [SEL_198] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_220] + SHUFFLE [RS_225] PartitionCols:_col0 - Select Operator [SEL_219] (rows=285115246 width=7) + Select Operator [SEL_224] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_218] (rows=285115246 width=7) + Filter Operator [FIL_223] (rows=285115246 width=7) predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_ship_customer_sk BETWEEN DynamicValue(RS_61_c_c_customer_sk_min) AND DynamicValue(RS_61_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_61_c_c_customer_sk_bloom_filter))) TableScan [TS_33] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_217] - Group By Operator [GBY_216] (rows=1 width=12) + BROADCAST [RS_222] + Group By Operator [GBY_221] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_167] - Group By Operator [GBY_166] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_172] + Group By Operator [GBY_171] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_165] (rows=1414922 width=4) + Select Operator [SEL_170] (rows=1414922 width=4) Output:["_col0"] - Please refer to the previous Merge Join Operator [MERGEJOIN_181] + Please refer to the previous Merge Join Operator [MERGEJOIN_186] diff --git ql/src/test/results/clientpositive/perf/tez/query37.q.out ql/src/test/results/clientpositive/perf/tez/query37.q.out index 2724fd44dc..3b085cbf2f 100644 --- ql/src/test/results/clientpositive/perf/tez/query37.q.out +++ ql/src/test/results/clientpositive/perf/tez/query37.q.out @@ -56,78 +56,78 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_101] - Limit [LIM_100] (rows=4 width=396) + File Output Operator [FS_106] + Limit [LIM_105] (rows=4 width=396) Number of rows:100 - Select Operator [SEL_99] (rows=4 width=396) + Select Operator [SEL_104] (rows=4 width=396) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_98] - Group By Operator [GBY_97] (rows=4 width=396) + SHUFFLE [RS_103] + Group By Operator [GBY_102] (rows=4 width=396) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_23] (rows=8 width=396) Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 - Top N Key Operator [TNK_43] (rows=11627 width=396) + Top N Key Operator [TNK_48] (rows=11627 width=396) keys:_col2, _col3, _col4,top n:100 - Merge Join Operator [MERGEJOIN_79] (rows=11627 width=396) + Merge Join Operator [MERGEJOIN_84] (rows=11627 width=396) Conds:RS_19._col1=RS_20._col1(Inner),Output:["_col2","_col3","_col4"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_77] (rows=1781971 width=400) - Conds:RS_90._col0=RS_82._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_82] (rows=1781971 width=400) + Conds:RS_95._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_82] + PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_81] (rows=297 width=400) + Select Operator [SEL_86] (rows=297 width=400) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_80] (rows=297 width=404) + Filter Operator [FIL_85] (rows=297 width=404) predicate:(i_current_price BETWEEN 22 AND 52 and (i_manufact_id) IN (678, 964, 918, 849) and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=403) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_90] + SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_89] (rows=287989836 width=4) + Select Operator [SEL_94] (rows=287989836 width=4) Output:["_col0"] - Filter Operator [FIL_88] (rows=287989836 width=4) + Filter Operator [FIL_93] (rows=287989836 width=4) predicate:(cs_item_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=4) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_item_sk"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_87] - Group By Operator [GBY_86] (rows=1 width=12) + BROADCAST [RS_92] + Group By Operator [GBY_91] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_85] - Group By Operator [GBY_84] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_90] + Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_83] (rows=297 width=4) + Select Operator [SEL_88] (rows=297 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_81] + Please refer to the previous Select Operator [SEL_86] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_78] (rows=1879072 width=4) - Conds:RS_93._col0=RS_96._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_83] (rows=1879072 width=4) + Conds:RS_98._col0=RS_101._col0(Inner),Output:["_col1"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_96] + SHUFFLE [RS_101] PartitionCols:_col0 - Select Operator [SEL_95] (rows=8116 width=4) + Select Operator [SEL_100] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_94] (rows=8116 width=98) + Filter Operator [FIL_99] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_93] + SHUFFLE [RS_98] PartitionCols:_col0 - Select Operator [SEL_92] (rows=16912800 width=8) + Select Operator [SEL_97] (rows=16912800 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_91] (rows=16912800 width=11) + Filter Operator [FIL_96] (rows=16912800 width=11) predicate:(inv_quantity_on_hand BETWEEN 100 AND 500 and inv_item_sk is not null and inv_date_sk is not null) TableScan [TS_6] (rows=37584000 width=11) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] diff --git ql/src/test/results/clientpositive/perf/tez/query40.q.out ql/src/test/results/clientpositive/perf/tez/query40.q.out index 4b65c82e00..2d368840ae 100644 --- ql/src/test/results/clientpositive/perf/tez/query40.q.out +++ ql/src/test/results/clientpositive/perf/tez/query40.q.out @@ -81,94 +81,94 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_126] - Limit [LIM_125] (rows=100 width=410) + File Output Operator [FS_131] + Limit [LIM_130] (rows=100 width=410) Number of rows:100 - Select Operator [SEL_124] (rows=769995 width=410) + Select Operator [SEL_129] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - Group By Operator [GBY_122] (rows=769995 width=410) + SHUFFLE [RS_128] + Group By Operator [GBY_127] (rows=769995 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1 Group By Operator [GBY_29] (rows=51819042 width=410) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col0, _col1 - Top N Key Operator [TNK_55] (rows=51819042 width=302) - keys:_col0, _col1,top n:100 - Select Operator [SEL_27] (rows=51819042 width=302) - Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_101] (rows=51819042 width=302) - Conds:RS_24._col1=RS_121._col0(Inner),Output:["_col4","_col7","_col9","_col10","_col12","_col14"] + Select Operator [SEL_27] (rows=51819042 width=302) + Output:["_col0","_col1","_col2","_col3"] + Top N Key Operator [TNK_58] (rows=51819042 width=302) + keys:_col14, _col12,top n:100 + Merge Join Operator [MERGEJOIN_106] (rows=51819042 width=302) + Conds:RS_24._col1=RS_126._col0(Inner),Output:["_col4","_col7","_col9","_col10","_col12","_col14"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_121] + SHUFFLE [RS_126] PartitionCols:_col0 - Select Operator [SEL_120] (rows=27 width=90) + Select Operator [SEL_125] (rows=27 width=90) Output:["_col0","_col1"] - Filter Operator [FIL_119] (rows=27 width=90) + Filter Operator [FIL_124] (rows=27 width=90) predicate:w_warehouse_sk is not null TableScan [TS_12] (rows=27 width=90) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_state"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_100] (rows=51819042 width=220) - Conds:RS_21._col2=RS_104._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] + Merge Join Operator [MERGEJOIN_105] (rows=51819042 width=220) + Conds:RS_21._col2=RS_109._col0(Inner),Output:["_col1","_col4","_col7","_col9","_col10","_col12"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_104] + SHUFFLE [RS_109] PartitionCols:_col0 - Select Operator [SEL_103] (rows=51333 width=104) + Select Operator [SEL_108] (rows=51333 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_102] (rows=51333 width=215) + Filter Operator [FIL_107] (rows=51333 width=215) predicate:(i_current_price BETWEEN 0.99 AND 1.49 and i_item_sk is not null) TableScan [TS_9] (rows=462000 width=215) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_current_price"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_99] (rows=466374405 width=171) - Conds:RS_18._col0=RS_118._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] + Merge Join Operator [MERGEJOIN_104] (rows=466374405 width=171) + Conds:RS_18._col0=RS_123._col0(Inner),Output:["_col1","_col2","_col4","_col7","_col9","_col10"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_118] + SHUFFLE [RS_123] PartitionCols:_col0 - Select Operator [SEL_117] (rows=8116 width=12) + Select Operator [SEL_122] (rows=8116 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_116] (rows=8116 width=98) + Filter Operator [FIL_121] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_98] (rows=466374405 width=167) - Conds:RS_112._col2, _col3=RS_115._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] + Merge Join Operator [MERGEJOIN_103] (rows=466374405 width=167) + Conds:RS_117._col2, _col3=RS_120._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col7"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_112] + SHUFFLE [RS_117] PartitionCols:_col2, _col3 - Select Operator [SEL_111] (rows=285115816 width=127) + Select Operator [SEL_116] (rows=285115816 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_110] (rows=285115816 width=127) + Filter Operator [FIL_115] (rows=285115816 width=127) predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_22_item_i_item_sk_min) AND DynamicValue(RS_22_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_22_item_i_item_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=127) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_order_number","cs_sales_price"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_109] - Group By Operator [GBY_108] (rows=1 width=12) + BROADCAST [RS_114] + Group By Operator [GBY_113] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_107] - Group By Operator [GBY_106] (rows=1 width=12) + SHUFFLE [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_105] (rows=51333 width=4) + Select Operator [SEL_110] (rows=51333 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_103] + Please refer to the previous Select Operator [SEL_108] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_115] + SHUFFLE [RS_120] PartitionCols:_col0, _col1 - Select Operator [SEL_114] (rows=28798881 width=117) + Select Operator [SEL_119] (rows=28798881 width=117) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_113] (rows=28798881 width=117) + Filter Operator [FIL_118] (rows=28798881 width=117) predicate:(cr_order_number is not null and cr_item_sk is not null) TableScan [TS_3] (rows=28798881 width=117) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_refunded_cash"] diff --git ql/src/test/results/clientpositive/perf/tez/query43.q.out ql/src/test/results/clientpositive/perf/tez/query43.q.out index eb19d41926..3205aa83e5 100644 --- ql/src/test/results/clientpositive/perf/tez/query43.q.out +++ ql/src/test/results/clientpositive/perf/tez/query43.q.out @@ -57,67 +57,69 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_74] - Limit [LIM_73] (rows=100 width=972) + File Output Operator [FS_80] + Limit [LIM_79] (rows=100 width=972) Number of rows:100 - Select Operator [SEL_72] (rows=3751 width=972) + Select Operator [SEL_78] (rows=3751 width=972) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_71] - Group By Operator [GBY_70] (rows=3751 width=972) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0, _col1 - Group By Operator [GBY_17] (rows=2486913 width=972) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 - Top N Key Operator [TNK_33] (rows=525329897 width=322) - keys:_col0, _col1,top n:100 + SHUFFLE [RS_77] + Top N Key Operator [TNK_76] (rows=3751 width=972) + keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8,top n:100 + Group By Operator [GBY_75] (rows=3751 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0, _col1 + Group By Operator [GBY_17] (rows=2486913 width=972) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col0, _col1 Select Operator [SEL_15] (rows=525329897 width=322) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Merge Join Operator [MERGEJOIN_55] (rows=525329897 width=322) - Conds:RS_12._col1=RS_69._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col12","_col13"] - <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_69] - PartitionCols:_col0 - Select Operator [SEL_68] (rows=341 width=192) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_67] (rows=341 width=303) - predicate:((s_gmt_offset = -6) and s_store_sk is not null) - TableScan [TS_6] (rows=1704 width=303) - default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_54] (rows=525329897 width=138) - Conds:RS_66._col0=RS_58._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_58] - PartitionCols:_col0 - Select Operator [SEL_57] (rows=652 width=32) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_56] (rows=652 width=99) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=99) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_day_name"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_66] - PartitionCols:_col0 - Select Operator [SEL_65] (rows=525329897 width=114) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_64] (rows=525329897 width=114) - predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_0] (rows=575995635 width=114) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] - <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_63] - Group By Operator [GBY_62] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_61] - Group By Operator [GBY_60] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_59] (rows=652 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_57] + Top N Key Operator [TNK_36] (rows=525329897 width=322) + keys:_col13, _col12,top n:100 + Merge Join Operator [MERGEJOIN_60] (rows=525329897 width=322) + Conds:RS_12._col1=RS_74._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col12","_col13"] + <-Map 8 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_74] + PartitionCols:_col0 + Select Operator [SEL_73] (rows=341 width=192) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_72] (rows=341 width=303) + predicate:((s_gmt_offset = -6) and s_store_sk is not null) + TableScan [TS_6] (rows=1704 width=303) + default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id","s_store_name","s_gmt_offset"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_59] (rows=525329897 width=138) + Conds:RS_71._col0=RS_63._col0(Inner),Output:["_col1","_col2","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] + <-Map 6 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_63] + PartitionCols:_col0 + Select Operator [SEL_62] (rows=652 width=32) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_61] (rows=652 width=99) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=99) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_day_name"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_71] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=525329897 width=114) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_69] (rows=525329897 width=114) + predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_0] (rows=575995635 width=114) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_sales_price"] + <-Reducer 7 [BROADCAST_EDGE] vectorized + BROADCAST [RS_68] + Group By Operator [GBY_67] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_66] + Group By Operator [GBY_65] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_64] (rows=652 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_62] diff --git ql/src/test/results/clientpositive/perf/tez/query45.q.out ql/src/test/results/clientpositive/perf/tez/query45.q.out index 4538a6540d..f3f646213f 100644 --- ql/src/test/results/clientpositive/perf/tez/query45.q.out +++ ql/src/test/results/clientpositive/perf/tez/query45.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[133][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[138][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales, customer, customer_address, date_dim, item @@ -70,137 +70,137 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_171] - Limit [LIM_170] (rows=100 width=299) + File Output Operator [FS_176] + Limit [LIM_175] (rows=100 width=299) Number of rows:100 - Select Operator [SEL_169] (rows=17401956 width=299) + Select Operator [SEL_174] (rows=17401956 width=299) Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_168] - Group By Operator [GBY_167] (rows=17401956 width=299) + SHUFFLE [RS_173] + Group By Operator [GBY_172] (rows=17401956 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col0, _col1 Group By Operator [GBY_52] (rows=143930993 width=299) Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col8, _col7 - Top N Key Operator [TNK_82] (rows=143930993 width=310) - keys:_col8, _col7,top n:100 - Select Operator [SEL_51] (rows=143930993 width=310) - Output:["_col3","_col7","_col8"] + Select Operator [SEL_51] (rows=143930993 width=310) + Output:["_col3","_col7","_col8"] + Top N Key Operator [TNK_85] (rows=143930993 width=310) + keys:_col8, _col7,top n:100 Filter Operator [FIL_50] (rows=143930993 width=310) predicate:(((_col14 <> 0L) and _col16 is not null) or (substr(_col8, 1, 5)) IN ('85669', '86197', '88274', '83405', '86475', '85392', '85460', '80348', '81792')) Select Operator [SEL_49] (rows=143930993 width=310) Output:["_col3","_col7","_col8","_col14","_col16"] - Merge Join Operator [MERGEJOIN_133] (rows=143930993 width=310) + Merge Join Operator [MERGEJOIN_138] (rows=143930993 width=310) Conds:(Inner),Output:["_col3","_col4","_col8","_col12","_col16"] <-Reducer 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_166] - Group By Operator [GBY_165] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_171] + Group By Operator [GBY_170] (rows=1 width=8) Output:["_col0"],aggregations:["count(VALUE._col0)"] <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_164] - Group By Operator [GBY_163] (rows=1 width=8) + PARTITION_ONLY_SHUFFLE [RS_169] + Group By Operator [GBY_168] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_162] (rows=11 width=4) - Filter Operator [FIL_161] (rows=11 width=4) + Select Operator [SEL_167] (rows=11 width=4) + Filter Operator [FIL_166] (rows=11 width=4) predicate:(i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) TableScan [TS_33] (rows=462000 width=4) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_46] - Merge Join Operator [MERGEJOIN_132] (rows=143930993 width=302) + Merge Join Operator [MERGEJOIN_137] (rows=143930993 width=302) Conds:RS_43._col0=RS_44._col6(Inner),Output:["_col3","_col4","_col8","_col12"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col6 - Merge Join Operator [MERGEJOIN_131] (rows=143930993 width=119) + Merge Join Operator [MERGEJOIN_136] (rows=143930993 width=119) Conds:RS_29._col0=RS_30._col1(Inner),Output:["_col3","_col6","_col7"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_130] (rows=143930993 width=119) - Conds:RS_160._col0=RS_152._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_135] (rows=143930993 width=119) + Conds:RS_165._col0=RS_157._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 14 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_152] + PARTITION_ONLY_SHUFFLE [RS_157] PartitionCols:_col0 - Select Operator [SEL_151] (rows=130 width=12) + Select Operator [SEL_156] (rows=130 width=12) Output:["_col0"] - Filter Operator [FIL_150] (rows=130 width=12) + Filter Operator [FIL_155] (rows=130 width=12) predicate:((d_year = 2000) and (d_qoy = 2) and d_date_sk is not null) TableScan [TS_19] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_160] + SHUFFLE [RS_165] PartitionCols:_col0 - Select Operator [SEL_159] (rows=143930993 width=123) + Select Operator [SEL_164] (rows=143930993 width=123) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_158] (rows=143930993 width=123) + Filter Operator [FIL_163] (rows=143930993 width=123) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_23_date_dim_d_date_sk_min) AND DynamicValue(RS_23_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_23_date_dim_d_date_sk_bloom_filter))) TableScan [TS_16] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_customer_sk","ws_sales_price"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_157] - Group By Operator [GBY_156] (rows=1 width=12) + BROADCAST [RS_162] + Group By Operator [GBY_161] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_155] - Group By Operator [GBY_154] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_160] + Group By Operator [GBY_159] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_153] (rows=130 width=4) + Select Operator [SEL_158] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_151] + Please refer to the previous Select Operator [SEL_156] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_129] (rows=462007 width=4) - Conds:RS_144._col1=RS_149._col0(Left Outer),Output:["_col0","_col3"] + Merge Join Operator [MERGEJOIN_134] (rows=462007 width=4) + Conds:RS_149._col1=RS_154._col0(Left Outer),Output:["_col0","_col3"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_149] PartitionCols:_col1 - Select Operator [SEL_142] (rows=462000 width=104) + Select Operator [SEL_147] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_140] (rows=462000 width=104) + Filter Operator [FIL_145] (rows=462000 width=104) predicate:i_item_sk is not null TableScan [TS_6] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_149] + SHUFFLE [RS_154] PartitionCols:_col0 - Select Operator [SEL_148] (rows=5 width=104) + Select Operator [SEL_153] (rows=5 width=104) Output:["_col0","_col1"] - Group By Operator [GBY_147] (rows=5 width=100) + Group By Operator [GBY_152] (rows=5 width=100) Output:["_col0"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] + SHUFFLE [RS_151] PartitionCols:_col0 - Group By Operator [GBY_145] (rows=5 width=100) + Group By Operator [GBY_150] (rows=5 width=100) Output:["_col0"],keys:i_item_id - Select Operator [SEL_143] (rows=11 width=104) + Select Operator [SEL_148] (rows=11 width=104) Output:["i_item_id"] - Filter Operator [FIL_141] (rows=11 width=104) + Filter Operator [FIL_146] (rows=11 width=104) predicate:((i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) and i_item_id is not null) Please refer to the previous TableScan [TS_6] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_128] (rows=80000000 width=191) - Conds:RS_136._col1=RS_139._col0(Inner),Output:["_col0","_col3","_col4"] + Merge Join Operator [MERGEJOIN_133] (rows=80000000 width=191) + Conds:RS_141._col1=RS_144._col0(Inner),Output:["_col0","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_136] + SHUFFLE [RS_141] PartitionCols:_col1 - Select Operator [SEL_135] (rows=80000000 width=8) + Select Operator [SEL_140] (rows=80000000 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_134] (rows=80000000 width=8) + Filter Operator [FIL_139] (rows=80000000 width=8) predicate:(c_customer_sk is not null and c_current_addr_sk is not null) TableScan [TS_0] (rows=80000000 width=8) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_addr_sk"] <-Map 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_139] + SHUFFLE [RS_144] PartitionCols:_col0 - Select Operator [SEL_138] (rows=40000000 width=191) + Select Operator [SEL_143] (rows=40000000 width=191) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_137] (rows=40000000 width=191) + Filter Operator [FIL_142] (rows=40000000 width=191) predicate:ca_address_sk is not null TableScan [TS_3] (rows=40000000 width=191) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county","ca_zip"] diff --git ql/src/test/results/clientpositive/perf/tez/query49.q.out ql/src/test/results/clientpositive/perf/tez/query49.q.out index 9c34eccceb..239592bc1d 100644 --- ql/src/test/results/clientpositive/perf/tez/query49.q.out +++ ql/src/test/results/clientpositive/perf/tez/query49.q.out @@ -299,251 +299,251 @@ Stage-0 limit:100 Stage-1 Reducer 11 vectorized - File Output Operator [FS_307] - Limit [LIM_306] (rows=100 width=215) + File Output Operator [FS_312] + Limit [LIM_311] (rows=100 width=215) Number of rows:100 - Select Operator [SEL_305] (rows=40436 width=215) + Select Operator [SEL_310] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_304] - Select Operator [SEL_303] (rows=40436 width=215) + SHUFFLE [RS_309] + Select Operator [SEL_308] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_302] (rows=40436 width=215) + Group By Operator [GBY_307] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 9 [SIMPLE_EDGE] <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_348] + Reduce Output Operator [RS_353] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_347] (rows=40436 width=215) + Group By Operator [GBY_352] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_346] (rows=40436 width=214) + Top N Key Operator [TNK_351] (rows=40436 width=214) keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_345] (rows=14232 width=213) + Select Operator [SEL_350] (rows=14232 width=213) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_344] (rows=14232 width=248) + Filter Operator [FIL_349] (rows=14232 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_343] (rows=21349 width=248) + PTF Operator [PTF_348] (rows=21349 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_342] (rows=21349 width=248) + Select Operator [SEL_347] (rows=21349 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 23 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_341] + SHUFFLE [RS_346] PartitionCols:0 - Select Operator [SEL_340] (rows=21349 width=244) + Select Operator [SEL_345] (rows=21349 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_339] (rows=21349 width=244) + PTF Operator [PTF_344] (rows=21349 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_338] (rows=21349 width=244) + Select Operator [SEL_343] (rows=21349 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_337] + SHUFFLE [RS_342] PartitionCols:0 - Group By Operator [GBY_336] (rows=21349 width=244) + Group By Operator [GBY_341] (rows=21349 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_86] PartitionCols:_col0 Group By Operator [GBY_85] (rows=426980 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_234] (rows=20856667 width=236) - Conds:RS_81._col1, _col2=RS_335._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] + Merge Join Operator [MERGEJOIN_239] (rows=20856667 width=236) + Conds:RS_81._col1, _col2=RS_340._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_335] + SHUFFLE [RS_340] PartitionCols:_col0, _col1 - Select Operator [SEL_334] (rows=19197050 width=124) + Select Operator [SEL_339] (rows=19197050 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_333] (rows=19197050 width=119) + Filter Operator [FIL_338] (rows=19197050 width=119) predicate:((sr_return_amt > 10000) and sr_ticket_number is not null and sr_item_sk is not null) TableScan [TS_75] (rows=57591150 width=119) default@store_returns,sr,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_quantity","sr_return_amt"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_81] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_233] (rows=61119617 width=124) - Conds:RS_332._col0=RS_269._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_238] (rows=61119617 width=124) + Conds:RS_337._col0=RS_274._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_269] + PARTITION_ONLY_SHUFFLE [RS_274] PartitionCols:_col0 - Select Operator [SEL_264] (rows=50 width=4) + Select Operator [SEL_269] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_263] (rows=50 width=12) + Filter Operator [FIL_268] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 12) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_332] + SHUFFLE [RS_337] PartitionCols:_col0 - Select Operator [SEL_331] (rows=61119617 width=127) + Select Operator [SEL_336] (rows=61119617 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_330] (rows=61119617 width=229) + Filter Operator [FIL_335] (rows=61119617 width=229) predicate:((ss_net_profit > 1) and (ss_net_paid > 0) and (ss_quantity > 0) and ss_sold_date_sk is not null and ss_ticket_number is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_79_date_dim_d_date_sk_min) AND DynamicValue(RS_79_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_79_date_dim_d_date_sk_bloom_filter))) TableScan [TS_69] (rows=575995635 width=229) default@store_sales,sts,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_ticket_number","ss_quantity","ss_net_paid","ss_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_329] - Group By Operator [GBY_328] (rows=1 width=12) + BROADCAST [RS_334] + Group By Operator [GBY_333] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_276] - Group By Operator [GBY_273] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_281] + Group By Operator [GBY_278] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_270] (rows=50 width=4) + Select Operator [SEL_275] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_264] + Please refer to the previous Select Operator [SEL_269] <-Reducer 8 [CONTAINS] vectorized - Reduce Output Operator [RS_301] + Reduce Output Operator [RS_306] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_300] (rows=40436 width=215) + Group By Operator [GBY_305] (rows=40436 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Top N Key Operator [TNK_299] (rows=40436 width=214) + Top N Key Operator [TNK_304] (rows=40436 width=214) keys:_col0, _col3, _col4, _col1, _col2,top n:100 - Select Operator [SEL_298] (rows=26204 width=215) + Select Operator [SEL_303] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_297] (rows=26204 width=215) + Group By Operator [GBY_302] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 7 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_327] + Reduce Output Operator [RS_332] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_326] (rows=26204 width=215) + Group By Operator [GBY_331] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_325] (rows=12574 width=215) + Select Operator [SEL_330] (rows=12574 width=215) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_324] (rows=12574 width=248) + Filter Operator [FIL_329] (rows=12574 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_323] (rows=18863 width=248) + PTF Operator [PTF_328] (rows=18863 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_322] (rows=18863 width=248) + Select Operator [SEL_327] (rows=18863 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_321] + SHUFFLE [RS_326] PartitionCols:0 - Select Operator [SEL_320] (rows=18863 width=244) + Select Operator [SEL_325] (rows=18863 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_319] (rows=18863 width=244) + PTF Operator [PTF_324] (rows=18863 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_318] (rows=18863 width=244) + Select Operator [SEL_323] (rows=18863 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_317] + SHUFFLE [RS_322] PartitionCols:0 - Group By Operator [GBY_316] (rows=18863 width=244) + Group By Operator [GBY_321] (rows=18863 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_48] PartitionCols:_col0 Group By Operator [GBY_47] (rows=169767 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_232] (rows=9599627 width=236) - Conds:RS_43._col1, _col2=RS_315._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] + Merge Join Operator [MERGEJOIN_237] (rows=9599627 width=236) + Conds:RS_43._col1, _col2=RS_320._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_315] + SHUFFLE [RS_320] PartitionCols:_col0, _col1 - Select Operator [SEL_314] (rows=9599627 width=124) + Select Operator [SEL_319] (rows=9599627 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_313] (rows=9599627 width=121) + Filter Operator [FIL_318] (rows=9599627 width=121) predicate:((cr_return_amount > 10000) and cr_order_number is not null and cr_item_sk is not null) TableScan [TS_37] (rows=28798881 width=121) default@catalog_returns,cr,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_quantity","cr_return_amount"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_231] (rows=31838858 width=124) - Conds:RS_312._col0=RS_267._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_236] (rows=31838858 width=124) + Conds:RS_317._col0=RS_272._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_267] + PARTITION_ONLY_SHUFFLE [RS_272] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_264] + Please refer to the previous Select Operator [SEL_269] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_312] + SHUFFLE [RS_317] PartitionCols:_col0 - Select Operator [SEL_311] (rows=31838858 width=127) + Select Operator [SEL_316] (rows=31838858 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_310] (rows=31838858 width=239) + Filter Operator [FIL_315] (rows=31838858 width=239) predicate:((cs_net_profit > 1) and (cs_net_paid > 0) and (cs_quantity > 0) and cs_sold_date_sk is not null and cs_order_number is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_41_date_dim_d_date_sk_min) AND DynamicValue(RS_41_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_41_date_dim_d_date_sk_bloom_filter))) TableScan [TS_31] (rows=287989836 width=239) default@catalog_sales,cs,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_item_sk","cs_order_number","cs_quantity","cs_net_paid","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_309] - Group By Operator [GBY_308] (rows=1 width=12) + BROADCAST [RS_314] + Group By Operator [GBY_313] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_275] - Group By Operator [GBY_272] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_280] + Group By Operator [GBY_277] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_268] (rows=50 width=4) + Select Operator [SEL_273] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_264] + Please refer to the previous Select Operator [SEL_269] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_296] + Reduce Output Operator [RS_301] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_295] (rows=26204 width=215) + Group By Operator [GBY_300] (rows=26204 width=215) Output:["_col0","_col1","_col2","_col3","_col4"],keys:_col0, _col3, _col4, _col1, _col2 - Select Operator [SEL_294] (rows=13630 width=211) + Select Operator [SEL_299] (rows=13630 width=211) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_293] (rows=13630 width=248) + Filter Operator [FIL_298] (rows=13630 width=248) predicate:((_col0 <= 10) or (rank_window_1 <= 10)) - PTF Operator [PTF_292] (rows=20445 width=248) + PTF Operator [PTF_297] (rows=20445 width=248) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col4 AS decimal(15,4)) / CAST( _col5 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_291] (rows=20445 width=248) + Select Operator [SEL_296] (rows=20445 width=248) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_290] + SHUFFLE [RS_295] PartitionCols:0 - Select Operator [SEL_289] (rows=20445 width=244) + Select Operator [SEL_294] (rows=20445 width=244) Output:["rank_window_0","_col0","_col1","_col2","_col3","_col4"] - PTF Operator [PTF_288] (rows=20445 width=244) + PTF Operator [PTF_293] (rows=20445 width=244) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"(CAST( _col1 AS decimal(15,4)) / CAST( _col2 AS decimal(15,4))) ASC NULLS LAST","partition by:":"0"}] - Select Operator [SEL_287] (rows=20445 width=244) + Select Operator [SEL_292] (rows=20445 width=244) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_286] + SHUFFLE [RS_291] PartitionCols:0 - Group By Operator [GBY_285] (rows=20445 width=244) + Group By Operator [GBY_290] (rows=20445 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=102225 width=244) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)","sum(_col3)","sum(_col9)","sum(_col4)"],keys:_col1 - Merge Join Operator [MERGEJOIN_230] (rows=5227456 width=236) - Conds:RS_12._col1, _col2=RS_284._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] + Merge Join Operator [MERGEJOIN_235] (rows=5227456 width=236) + Conds:RS_12._col1, _col2=RS_289._col0, _col1(Inner),Output:["_col1","_col3","_col4","_col8","_col9"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_284] + SHUFFLE [RS_289] PartitionCols:_col0, _col1 - Select Operator [SEL_283] (rows=4799489 width=124) + Select Operator [SEL_288] (rows=4799489 width=124) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_282] (rows=4799489 width=118) + Filter Operator [FIL_287] (rows=4799489 width=118) predicate:((wr_return_amt > 10000) and wr_order_number is not null and wr_item_sk is not null) TableScan [TS_6] (rows=14398467 width=118) default@web_returns,wr,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_quantity","wr_return_amt"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_229] (rows=15996318 width=124) - Conds:RS_281._col0=RS_265._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_234] (rows=15996318 width=124) + Conds:RS_286._col0=RS_270._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 12 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_265] + PARTITION_ONLY_SHUFFLE [RS_270] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_264] + Please refer to the previous Select Operator [SEL_269] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_281] + SHUFFLE [RS_286] PartitionCols:_col0 - Select Operator [SEL_280] (rows=15996318 width=127) + Select Operator [SEL_285] (rows=15996318 width=127) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_279] (rows=15996318 width=239) + Filter Operator [FIL_284] (rows=15996318 width=239) predicate:((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_sold_date_sk is not null and ws_order_number is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=239) default@web_sales,ws,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_order_number","ws_quantity","ws_net_paid","ws_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_278] - Group By Operator [GBY_277] (rows=1 width=12) + BROADCAST [RS_283] + Group By Operator [GBY_282] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_274] - Group By Operator [GBY_271] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_279] + Group By Operator [GBY_276] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_266] (rows=50 width=4) + Select Operator [SEL_271] (rows=50 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_264] + Please refer to the previous Select Operator [SEL_269] diff --git ql/src/test/results/clientpositive/perf/tez/query5.q.out ql/src/test/results/clientpositive/perf/tez/query5.q.out index 38fba27a8e..03980ac2c0 100644 --- ql/src/test/results/clientpositive/perf/tez/query5.q.out +++ ql/src/test/results/clientpositive/perf/tez/query5.q.out @@ -303,235 +303,235 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_304] - Limit [LIM_303] (rows=100 width=619) + File Output Operator [FS_306] + Limit [LIM_305] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_302] (rows=59581 width=619) + Select Operator [SEL_304] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_301] - Select Operator [SEL_300] (rows=59581 width=619) + SHUFFLE [RS_303] + Select Operator [SEL_302] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_299] (rows=59581 width=627) + Group By Operator [GBY_301] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 14 [CONTAINS] vectorized - Reduce Output Operator [RS_315] + Reduce Output Operator [RS_317] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_314] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_313] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_312] (rows=38846 width=619) + Top N Key Operator [TNK_316] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_315] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_314] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_311] (rows=38846 width=548) + Group By Operator [GBY_313] (rows=38846 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_47] PartitionCols:_col0 Group By Operator [GBY_46] (rows=26026820 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_222] (rows=313339499 width=546) - Conds:RS_42._col0=RS_310._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_224] (rows=313339499 width=546) + Conds:RS_42._col0=RS_312._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 24 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_310] + SHUFFLE [RS_312] PartitionCols:_col0 - Select Operator [SEL_309] (rows=46000 width=104) + Select Operator [SEL_311] (rows=46000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_308] (rows=46000 width=104) + Filter Operator [FIL_310] (rows=46000 width=104) predicate:cp_catalog_page_sk is not null TableScan [TS_36] (rows=46000 width=104) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_42] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_221] (rows=313339499 width=450) - Conds:Union 22._col1=RS_276._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_223] (rows=313339499 width=450) + Conds:Union 22._col1=RS_278._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_276] + SHUFFLE [RS_278] PartitionCols:_col0 - Select Operator [SEL_273] (rows=8116 width=4) + Select Operator [SEL_275] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_272] (rows=8116 width=98) + Filter Operator [FIL_274] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-08-18 00:00:00' and d_date_sk is not null) TableScan [TS_8] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Union 22 [SIMPLE_EDGE] <-Map 21 [CONTAINS] vectorized - Reduce Output Operator [RS_328] + Reduce Output Operator [RS_330] PartitionCols:_col1 - Select Operator [SEL_327] (rows=285117694 width=455) + Select Operator [SEL_329] (rows=285117694 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_326] (rows=285117694 width=231) + Filter Operator [FIL_328] (rows=285117694 width=231) predicate:(cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_40_date_dim_d_date_sk_min) AND DynamicValue(RS_40_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_40_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_253] (rows=287989836 width=231) + TableScan [TS_255] (rows=287989836 width=231) Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_325] - Group By Operator [GBY_324] (rows=1 width=12) + BROADCAST [RS_327] + Group By Operator [GBY_326] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_284] - Group By Operator [GBY_281] (rows=1 width=12) + SHUFFLE [RS_286] + Group By Operator [GBY_283] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_277] (rows=8116 width=4) + Select Operator [SEL_279] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_273] + Please refer to the previous Select Operator [SEL_275] <-Map 23 [CONTAINS] vectorized - Reduce Output Operator [RS_331] + Reduce Output Operator [RS_333] PartitionCols:_col1 - Select Operator [SEL_330] (rows=28221805 width=451) + Select Operator [SEL_332] (rows=28221805 width=451) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_329] (rows=28221805 width=227) + Filter Operator [FIL_331] (rows=28221805 width=227) predicate:(cr_catalog_page_sk is not null and cr_returned_date_sk is not null) - TableScan [TS_258] (rows=28798881 width=227) + TableScan [TS_260] (rows=28798881 width=227) Output:["cr_returned_date_sk","cr_catalog_page_sk","cr_return_amount","cr_net_loss"] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_323] + Reduce Output Operator [RS_325] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_322] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_321] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_320] (rows=53 width=615) + Top N Key Operator [TNK_324] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_323] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_322] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_319] (rows=53 width=548) + Group By Operator [GBY_321] (rows=53 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_80] PartitionCols:_col0 Group By Operator [GBY_79] (rows=31641 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_224] (rows=278713608 width=547) - Conds:RS_75._col0=RS_318._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_226] (rows=278713608 width=547) + Conds:RS_75._col0=RS_320._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_318] + SHUFFLE [RS_320] PartitionCols:_col0 - Select Operator [SEL_317] (rows=84 width=104) + Select Operator [SEL_319] (rows=84 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_316] (rows=84 width=104) + Filter Operator [FIL_318] (rows=84 width=104) predicate:web_site_sk is not null TableScan [TS_69] (rows=84 width=104) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_75] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_223] (rows=278713608 width=451) - Conds:Union 26._col1=RS_278._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_225] (rows=278713608 width=451) + Conds:Union 26._col1=RS_280._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_278] + SHUFFLE [RS_280] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_273] + Please refer to the previous Select Operator [SEL_275] <-Union 26 [SIMPLE_EDGE] <-Map 25 [CONTAINS] vectorized - Reduce Output Operator [RS_336] + Reduce Output Operator [RS_338] PartitionCols:_col1 - Select Operator [SEL_335] (rows=143930874 width=455) + Select Operator [SEL_337] (rows=143930874 width=455) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_334] (rows=143930874 width=231) + Filter Operator [FIL_336] (rows=143930874 width=231) predicate:(ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_73_date_dim_d_date_sk_min) AND DynamicValue(RS_73_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_73_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_263] (rows=144002668 width=231) + TableScan [TS_265] (rows=144002668 width=231) Output:["ws_sold_date_sk","ws_web_site_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_333] - Group By Operator [GBY_332] (rows=1 width=12) + BROADCAST [RS_335] + Group By Operator [GBY_334] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_285] - Group By Operator [GBY_282] (rows=1 width=12) + SHUFFLE [RS_287] + Group By Operator [GBY_284] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_279] (rows=8116 width=4) + Select Operator [SEL_281] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_273] + Please refer to the previous Select Operator [SEL_275] <-Reducer 28 [CONTAINS] - Reduce Output Operator [RS_271] + Reduce Output Operator [RS_273] PartitionCols:_col1 - Select Operator [SEL_269] (rows=134782734 width=454) + Select Operator [SEL_271] (rows=134782734 width=454) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_268] (rows=134782734 width=230) - Conds:RS_339._col0, _col2=RS_342._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] + Merge Join Operator [MERGEJOIN_270] (rows=134782734 width=230) + Conds:RS_341._col0, _col2=RS_344._col1, _col2(Inner),Output:["_col1","_col3","_col6","_col7"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] + SHUFFLE [RS_341] PartitionCols:_col0, _col2 - Select Operator [SEL_338] (rows=143966669 width=11) + Select Operator [SEL_340] (rows=143966669 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_337] (rows=143966669 width=11) + Filter Operator [FIL_339] (rows=143966669 width=11) predicate:(ws_web_site_sk is not null and ws_item_sk is not null and ws_order_number is not null) TableScan [TS_54] (rows=144002668 width=11) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_item_sk","ws_web_site_sk","ws_order_number"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_342] + SHUFFLE [RS_344] PartitionCols:_col1, _col2 - Select Operator [SEL_341] (rows=13749816 width=225) + Select Operator [SEL_343] (rows=13749816 width=225) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_340] (rows=13749816 width=225) + Filter Operator [FIL_342] (rows=13749816 width=225) predicate:(wr_returned_date_sk is not null and wr_item_sk is not null and wr_order_number is not null) TableScan [TS_57] (rows=14398467 width=225) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] vectorized - Reduce Output Operator [RS_298] + Reduce Output Operator [RS_300] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_297] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_296] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_295] (rows=822 width=617) + Top N Key Operator [TNK_299] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_298] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_297] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_294] (rows=822 width=548) + Group By Operator [GBY_296] (rows=822 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Group By Operator [GBY_21] (rows=983934 width=548) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col2)","sum(_col4)","sum(_col3)","sum(_col5)"],keys:_col8 - Merge Join Operator [MERGEJOIN_220] (rows=578964757 width=528) - Conds:RS_17._col0=RS_293._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] + Merge Join Operator [MERGEJOIN_222] (rows=578964757 width=528) + Conds:RS_17._col0=RS_295._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_293] + SHUFFLE [RS_295] PartitionCols:_col0 - Select Operator [SEL_292] (rows=1704 width=104) + Select Operator [SEL_294] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_291] (rows=1704 width=104) + Filter Operator [FIL_293] (rows=1704 width=104) predicate:s_store_sk is not null TableScan [TS_11] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_219] (rows=578964757 width=432) - Conds:Union 2._col1=RS_274._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_221] (rows=578964757 width=432) + Conds:Union 2._col1=RS_276._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_274] + SHUFFLE [RS_276] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_273] + Please refer to the previous Select Operator [SEL_275] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] vectorized - Reduce Output Operator [RS_290] + Reduce Output Operator [RS_292] PartitionCols:_col1 - Select Operator [SEL_289] (rows=525329897 width=445) + Select Operator [SEL_291] (rows=525329897 width=445) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_288] (rows=525329897 width=221) + Filter Operator [FIL_290] (rows=525329897 width=221) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_225] (rows=575995635 width=221) + TableScan [TS_227] (rows=575995635 width=221) Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 11 [BROADCAST_EDGE] vectorized - BROADCAST [RS_287] - Group By Operator [GBY_286] (rows=1 width=12) + BROADCAST [RS_289] + Group By Operator [GBY_288] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 10 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_283] - Group By Operator [GBY_280] (rows=1 width=12) + SHUFFLE [RS_285] + Group By Operator [GBY_282] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_275] (rows=8116 width=4) + Select Operator [SEL_277] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_273] + Please refer to the previous Select Operator [SEL_275] <-Map 9 [CONTAINS] vectorized - Reduce Output Operator [RS_307] + Reduce Output Operator [RS_309] PartitionCols:_col1 - Select Operator [SEL_306] (rows=53634860 width=447) + Select Operator [SEL_308] (rows=53634860 width=447) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_305] (rows=53634860 width=223) + Filter Operator [FIL_307] (rows=53634860 width=223) predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) - TableScan [TS_236] (rows=57591150 width=223) + TableScan [TS_238] (rows=57591150 width=223) Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] diff --git ql/src/test/results/clientpositive/perf/tez/query50.q.out ql/src/test/results/clientpositive/perf/tez/query50.q.out index 6e34831de6..3d9cbbde63 100644 --- ql/src/test/results/clientpositive/perf/tez/query50.q.out +++ ql/src/test/results/clientpositive/perf/tez/query50.q.out @@ -139,83 +139,83 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_140] - Limit [LIM_139] (rows=100 width=858) + File Output Operator [FS_145] + Limit [LIM_144] (rows=100 width=858) Number of rows:100 - Select Operator [SEL_138] (rows=478292911 width=857) + Select Operator [SEL_143] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] - Group By Operator [GBY_136] (rows=478292911 width=857) + SHUFFLE [RS_142] + Group By Operator [GBY_141] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Group By Operator [GBY_29] (rows=478292911 width=857) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)"],keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Top N Key Operator [TNK_56] (rows=478292911 width=825) - keys:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9,top n:100 - Select Operator [SEL_27] (rows=478292911 width=825) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] - Merge Join Operator [MERGEJOIN_120] (rows=478292911 width=825) - Conds:RS_24._col8=RS_135._col0(Inner),Output:["_col0","_col5","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] + Select Operator [SEL_27] (rows=478292911 width=825) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] + Top N Key Operator [TNK_59] (rows=478292911 width=825) + keys:_col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21,top n:100 + Merge Join Operator [MERGEJOIN_125] (rows=478292911 width=825) + Conds:RS_24._col8=RS_140._col0(Inner),Output:["_col0","_col5","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_135] + SHUFFLE [RS_140] PartitionCols:_col0 - Select Operator [SEL_134] (rows=1704 width=821) + Select Operator [SEL_139] (rows=1704 width=821) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] - Filter Operator [FIL_133] (rows=1704 width=821) + Filter Operator [FIL_138] (rows=1704 width=821) predicate:s_store_sk is not null TableScan [TS_12] (rows=1704 width=821) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_company_id","s_street_number","s_street_name","s_street_type","s_suite_number","s_city","s_county","s_state","s_zip"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col8 - Merge Join Operator [MERGEJOIN_119] (rows=478292911 width=11) - Conds:RS_21._col5=RS_132._col0(Inner),Output:["_col0","_col5","_col8"] + Merge Join Operator [MERGEJOIN_124] (rows=478292911 width=11) + Conds:RS_21._col5=RS_137._col0(Inner),Output:["_col0","_col5","_col8"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_132] + SHUFFLE [RS_137] PartitionCols:_col0 - Select Operator [SEL_131] (rows=73049 width=4) + Select Operator [SEL_136] (rows=73049 width=4) Output:["_col0"] - Filter Operator [FIL_130] (rows=73049 width=4) + Filter Operator [FIL_135] (rows=73049 width=4) predicate:d_date_sk is not null TableScan [TS_9] (rows=73049 width=4) default@date_dim,d1,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col5 - Merge Join Operator [MERGEJOIN_118] (rows=478292911 width=11) - Conds:RS_18._col1, _col2, _col3=RS_129._col1, _col2, _col4(Inner),Output:["_col0","_col5","_col8"] + Merge Join Operator [MERGEJOIN_123] (rows=478292911 width=11) + Conds:RS_18._col1, _col2, _col3=RS_134._col1, _col2, _col4(Inner),Output:["_col0","_col5","_col8"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_129] + SHUFFLE [RS_134] PartitionCols:_col1, _col2, _col4 - Select Operator [SEL_128] (rows=501694138 width=19) + Select Operator [SEL_133] (rows=501694138 width=19) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_127] (rows=501694138 width=19) + Filter Operator [FIL_132] (rows=501694138 width=19) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_store_sk is not null and ss_ticket_number is not null and ss_item_sk is not null) TableScan [TS_6] (rows=575995635 width=19) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1, _col2, _col3 - Merge Join Operator [MERGEJOIN_117] (rows=53632139 width=15) - Conds:RS_123._col0=RS_126._col0(Inner),Output:["_col0","_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_122] (rows=53632139 width=15) + Conds:RS_128._col0=RS_131._col0(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] + SHUFFLE [RS_128] PartitionCols:_col0 - Select Operator [SEL_122] (rows=53632139 width=15) + Select Operator [SEL_127] (rows=53632139 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_121] (rows=53632139 width=15) + Filter Operator [FIL_126] (rows=53632139 width=15) predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null and sr_item_sk is not null) TableScan [TS_0] (rows=57591150 width=15) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_126] + SHUFFLE [RS_131] PartitionCols:_col0 - Select Operator [SEL_125] (rows=50 width=4) + Select Operator [SEL_130] (rows=50 width=4) Output:["_col0"] - Filter Operator [FIL_124] (rows=50 width=12) + Filter Operator [FIL_129] (rows=50 width=12) predicate:((d_year = 2000) and (d_moy = 9) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,d2,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] diff --git ql/src/test/results/clientpositive/perf/tez/query60.q.out ql/src/test/results/clientpositive/perf/tez/query60.q.out index e77c89ba69..8bc53138d3 100644 --- ql/src/test/results/clientpositive/perf/tez/query60.q.out +++ ql/src/test/results/clientpositive/perf/tez/query60.q.out @@ -197,230 +197,232 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_362] - Limit [LIM_361] (rows=100 width=212) + File Output Operator [FS_368] + Limit [LIM_367] (rows=100 width=212) Number of rows:100 - Select Operator [SEL_360] (rows=1717 width=212) + Select Operator [SEL_366] (rows=1717 width=212) Output:["_col0","_col1"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_359] - Group By Operator [GBY_358] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 11 [CONTAINS] vectorized - Reduce Output Operator [RS_380] - PartitionCols:_col0 - Group By Operator [GBY_379] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_378] (rows=5151 width=212) - keys:_col0,top n:100 - Group By Operator [GBY_377] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_109] - PartitionCols:_col0 - Group By Operator [GBY_108] (rows=99586 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_305] (rows=69268204 width=211) - Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_104] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_294] (rows=34340 width=104) - Conds:RS_323._col1=RS_329._col0(Inner),Output:["_col0","_col1"] - <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_323] - PartitionCols:_col1 - Select Operator [SEL_322] (rows=462000 width=104) - Output:["_col0","_col1"] - Filter Operator [FIL_321] (rows=462000 width=104) - predicate:(i_item_id is not null and i_item_sk is not null) - TableScan [TS_0] (rows=462000 width=104) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] - <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_329] - PartitionCols:_col0 - Group By Operator [GBY_328] (rows=23100 width=100) - Output:["_col0"],keys:KEY._col0 - <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_327] - PartitionCols:_col0 - Group By Operator [GBY_326] (rows=23100 width=100) - Output:["_col0"],keys:i_item_id - Select Operator [SEL_325] (rows=46200 width=190) - Output:["i_item_id"] - Filter Operator [FIL_324] (rows=46200 width=190) - predicate:((i_category = 'Children') and i_item_id is not null) - TableScan [TS_3] (rows=462000 width=190) - default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] - <-Reducer 23 [SIMPLE_EDGE] - SHUFFLE [RS_105] - PartitionCols:_col2 - Select Operator [SEL_100] (rows=143931246 width=115) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_302] (rows=143931246 width=115) - Conds:RS_97._col2=RS_353._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_353] - PartitionCols:_col0 - Select Operator [SEL_350] (rows=8000000 width=4) - Output:["_col0"] - Filter Operator [FIL_349] (rows=8000000 width=112) - predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) - TableScan [TS_16] (rows=40000000 width=112) - default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] - <-Reducer 22 [SIMPLE_EDGE] - SHUFFLE [RS_97] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_301] (rows=143931246 width=119) - Conds:RS_376._col0=RS_336._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_336] - PartitionCols:_col0 - Select Operator [SEL_331] (rows=50 width=4) - Output:["_col0"] - Filter Operator [FIL_330] (rows=50 width=12) - predicate:((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) - TableScan [TS_13] (rows=73049 width=12) - default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] - <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_376] - PartitionCols:_col0 - Select Operator [SEL_375] (rows=143931246 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_374] (rows=143931246 width=123) - predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_85] (rows=144002668 width=123) - default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] - <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_373] - Group By Operator [GBY_372] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_343] - Group By Operator [GBY_340] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_337] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_331] - <-Reducer 4 [CONTAINS] vectorized - Reduce Output Operator [RS_357] - PartitionCols:_col0 - Group By Operator [GBY_356] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_355] (rows=5151 width=212) - keys:_col0,top n:100 - Group By Operator [GBY_354] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col0 - Group By Operator [GBY_33] (rows=343400 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_303] (rows=252818424 width=201) - Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_294] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col2 - Select Operator [SEL_25] (rows=525327191 width=110) - Output:["_col2","_col4"] - Merge Join Operator [MERGEJOIN_296] (rows=525327191 width=110) - Conds:RS_22._col2=RS_351._col0(Inner),Output:["_col1","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_351] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_350] - <-Reducer 15 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_295] (rows=525327191 width=114) - Conds:RS_348._col0=RS_332._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_332] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_331] - <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_348] - PartitionCols:_col0 - Select Operator [SEL_347] (rows=525327191 width=118) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_346] (rows=525327191 width=118) - predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_10] (rows=575995635 width=118) - default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] - <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_345] - Group By Operator [GBY_344] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_341] - Group By Operator [GBY_338] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_333] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_331] - <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_371] - PartitionCols:_col0 - Group By Operator [GBY_370] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 - Top N Key Operator [TNK_369] (rows=5151 width=212) - keys:_col0,top n:100 - Group By Operator [GBY_368] (rows=1717 width=212) - Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col0 - Group By Operator [GBY_70] (rows=195738 width=212) - Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=137215467 width=210) - Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_66] - PartitionCols:_col0 - Please refer to the previous Merge Join Operator [MERGEJOIN_294] - <-Reducer 20 [SIMPLE_EDGE] - SHUFFLE [RS_67] - PartitionCols:_col3 - Select Operator [SEL_62] (rows=285117733 width=115) - Output:["_col3","_col4"] - Merge Join Operator [MERGEJOIN_299] (rows=285117733 width=115) - Conds:RS_59._col1=RS_352._col0(Inner),Output:["_col2","_col3"] - <-Map 25 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_352] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_350] - <-Reducer 19 [SIMPLE_EDGE] - SHUFFLE [RS_59] + SHUFFLE [RS_365] + Top N Key Operator [TNK_364] (rows=1717 width=212) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_363] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 11 [CONTAINS] vectorized + Reduce Output Operator [RS_386] + PartitionCols:_col0 + Group By Operator [GBY_385] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_384] (rows=5151 width=212) + keys:_col0,top n:100 + Group By Operator [GBY_383] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_109] + PartitionCols:_col0 + Group By Operator [GBY_108] (rows=99586 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_310] (rows=69268204 width=211) + Conds:RS_104._col0=RS_105._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_299] (rows=34340 width=104) + Conds:RS_328._col1=RS_334._col0(Inner),Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_328] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_298] (rows=285117733 width=119) - Conds:RS_367._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 17 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_334] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_331] - <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_367] + Select Operator [SEL_327] (rows=462000 width=104) + Output:["_col0","_col1"] + Filter Operator [FIL_326] (rows=462000 width=104) + predicate:(i_item_id is not null and i_item_sk is not null) + TableScan [TS_0] (rows=462000 width=104) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] + <-Reducer 13 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_334] + PartitionCols:_col0 + Group By Operator [GBY_333] (rows=23100 width=100) + Output:["_col0"],keys:KEY._col0 + <-Map 12 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_332] PartitionCols:_col0 - Select Operator [SEL_366] (rows=285117733 width=123) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_365] (rows=285117733 width=123) - predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) - TableScan [TS_47] (rows=287989836 width=123) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] - <-Reducer 21 [BROADCAST_EDGE] vectorized - BROADCAST [RS_364] - Group By Operator [GBY_363] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] - <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_342] - Group By Operator [GBY_339] (rows=1 width=12) - Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_335] (rows=50 width=4) - Output:["_col0"] - Please refer to the previous Select Operator [SEL_331] + Group By Operator [GBY_331] (rows=23100 width=100) + Output:["_col0"],keys:i_item_id + Select Operator [SEL_330] (rows=46200 width=190) + Output:["i_item_id"] + Filter Operator [FIL_329] (rows=46200 width=190) + predicate:((i_category = 'Children') and i_item_id is not null) + TableScan [TS_3] (rows=462000 width=190) + default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_id","i_category"] + <-Reducer 23 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col2 + Select Operator [SEL_100] (rows=143931246 width=115) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_307] (rows=143931246 width=115) + Conds:RS_97._col2=RS_358._col0(Inner),Output:["_col1","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_358] + PartitionCols:_col0 + Select Operator [SEL_355] (rows=8000000 width=4) + Output:["_col0"] + Filter Operator [FIL_354] (rows=8000000 width=112) + predicate:((ca_gmt_offset = -6) and ca_address_sk is not null) + TableScan [TS_16] (rows=40000000 width=112) + default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_gmt_offset"] + <-Reducer 22 [SIMPLE_EDGE] + SHUFFLE [RS_97] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_306] (rows=143931246 width=119) + Conds:RS_382._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_341] + PartitionCols:_col0 + Select Operator [SEL_336] (rows=50 width=4) + Output:["_col0"] + Filter Operator [FIL_335] (rows=50 width=12) + predicate:((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) + TableScan [TS_13] (rows=73049 width=12) + default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] + <-Map 27 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_382] + PartitionCols:_col0 + Select Operator [SEL_381] (rows=143931246 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_380] (rows=143931246 width=123) + predicate:(ws_sold_date_sk is not null and ws_bill_addr_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_95_date_dim_d_date_sk_min) AND DynamicValue(RS_95_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_95_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_85] (rows=144002668 width=123) + default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_bill_addr_sk","ws_ext_sales_price"] + <-Reducer 24 [BROADCAST_EDGE] vectorized + BROADCAST [RS_379] + Group By Operator [GBY_378] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_348] + Group By Operator [GBY_345] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_342] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] + <-Reducer 4 [CONTAINS] vectorized + Reduce Output Operator [RS_362] + PartitionCols:_col0 + Group By Operator [GBY_361] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_360] (rows=5151 width=212) + keys:_col0,top n:100 + Group By Operator [GBY_359] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col0 + Group By Operator [GBY_33] (rows=343400 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_308] (rows=252818424 width=201) + Conds:RS_29._col0=RS_30._col2(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_299] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col2 + Select Operator [SEL_25] (rows=525327191 width=110) + Output:["_col2","_col4"] + Merge Join Operator [MERGEJOIN_301] (rows=525327191 width=110) + Conds:RS_22._col2=RS_356._col0(Inner),Output:["_col1","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_356] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_355] + <-Reducer 15 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_300] (rows=525327191 width=114) + Conds:RS_353._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_337] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_336] + <-Map 14 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_353] + PartitionCols:_col0 + Select Operator [SEL_352] (rows=525327191 width=118) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_351] (rows=525327191 width=118) + predicate:(ss_sold_date_sk is not null and ss_addr_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_20_date_dim_d_date_sk_min) AND DynamicValue(RS_20_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_20_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_10] (rows=575995635 width=118) + default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] + <-Reducer 18 [BROADCAST_EDGE] vectorized + BROADCAST [RS_350] + Group By Operator [GBY_349] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_346] + Group By Operator [GBY_343] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_338] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] + <-Reducer 9 [CONTAINS] vectorized + Reduce Output Operator [RS_377] + PartitionCols:_col0 + Group By Operator [GBY_376] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Top N Key Operator [TNK_375] (rows=5151 width=212) + keys:_col0,top n:100 + Group By Operator [GBY_374] (rows=1717 width=212) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Group By Operator [GBY_70] (rows=195738 width=212) + Output:["_col0","_col1"],aggregations:["sum(_col7)"],keys:_col1 + Merge Join Operator [MERGEJOIN_309] (rows=137215467 width=210) + Conds:RS_66._col0=RS_67._col3(Inner),Output:["_col1","_col7"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_66] + PartitionCols:_col0 + Please refer to the previous Merge Join Operator [MERGEJOIN_299] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_67] + PartitionCols:_col3 + Select Operator [SEL_62] (rows=285117733 width=115) + Output:["_col3","_col4"] + Merge Join Operator [MERGEJOIN_304] (rows=285117733 width=115) + Conds:RS_59._col1=RS_357._col0(Inner),Output:["_col2","_col3"] + <-Map 25 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_357] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_355] + <-Reducer 19 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_303] (rows=285117733 width=119) + Conds:RS_373._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 17 [SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_339] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_336] + <-Map 26 [SIMPLE_EDGE] vectorized + SHUFFLE [RS_373] + PartitionCols:_col0 + Select Operator [SEL_372] (rows=285117733 width=123) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_371] (rows=285117733 width=123) + predicate:(cs_sold_date_sk is not null and cs_bill_addr_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_57_date_dim_d_date_sk_min) AND DynamicValue(RS_57_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_57_date_dim_d_date_sk_bloom_filter))) + TableScan [TS_47] (rows=287989836 width=123) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_bill_addr_sk","cs_item_sk","cs_ext_sales_price"] + <-Reducer 21 [BROADCAST_EDGE] vectorized + BROADCAST [RS_370] + Group By Operator [GBY_369] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] + <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized + PARTITION_ONLY_SHUFFLE [RS_347] + Group By Operator [GBY_344] (rows=1 width=12) + Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] + Select Operator [SEL_340] (rows=50 width=4) + Output:["_col0"] + Please refer to the previous Select Operator [SEL_336] diff --git ql/src/test/results/clientpositive/perf/tez/query66.q.out ql/src/test/results/clientpositive/perf/tez/query66.q.out index 7ddcc21f92..0c4b12b72f 100644 --- ql/src/test/results/clientpositive/perf/tez/query66.q.out +++ ql/src/test/results/clientpositive/perf/tez/query66.q.out @@ -479,28 +479,28 @@ Stage-0 limit:-1 Stage-1 Reducer 9 vectorized - File Output Operator [FS_254] - Select Operator [SEL_253] (rows=100 width=4614) + File Output Operator [FS_259] + Select Operator [SEL_258] (rows=100 width=4614) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43"] - Limit [LIM_252] (rows=100 width=4510) + Limit [LIM_257] (rows=100 width=4510) Number of rows:100 - Select Operator [SEL_251] (rows=2423925 width=4510) + Select Operator [SEL_256] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] <-Reducer 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_250] - Group By Operator [GBY_249] (rows=2423925 width=4510) + SHUFFLE [RS_255] + Group By Operator [GBY_254] (rows=2423925 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)","sum(VALUE._col24)","sum(VALUE._col25)","sum(VALUE._col26)","sum(VALUE._col27)","sum(VALUE._col28)","sum(VALUE._col29)","sum(VALUE._col30)","sum(VALUE._col31)","sum(VALUE._col32)","sum(VALUE._col33)","sum(VALUE._col34)","sum(VALUE._col35)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Union 7 [SIMPLE_EDGE] <-Reducer 15 [CONTAINS] vectorized - Reduce Output Operator [RS_264] + Reduce Output Operator [RS_269] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_263] (rows=2513727 width=4510) + Group By Operator [GBY_268] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_262] (rows=2513727 width=3166) - keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 - Select Operator [SEL_261] (rows=2513727 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Group By Operator [GBY_260] (rows=2513700 width=3166) + Select Operator [SEL_267] (rows=2513727 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Top N Key Operator [TNK_266] (rows=2513727 width=3166) + keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 + Group By Operator [GBY_265] (rows=2513700 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_63] @@ -509,89 +509,89 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 Select Operator [SEL_60] (rows=15681803 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_204] (rows=15681803 width=750) - Conds:RS_57._col3=RS_243._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] + Merge Join Operator [MERGEJOIN_209] (rows=15681803 width=750) + Conds:RS_57._col3=RS_248._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_243] + SHUFFLE [RS_248] PartitionCols:_col0 - Select Operator [SEL_241] (rows=27 width=482) + Select Operator [SEL_246] (rows=27 width=482) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_240] (rows=27 width=482) + Filter Operator [FIL_245] (rows=27 width=482) predicate:w_warehouse_sk is not null TableScan [TS_12] (rows=27 width=482) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name","w_warehouse_sq_ft","w_city","w_county","w_state","w_country"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_203] (rows=15681803 width=275) - Conds:RS_54._col2=RS_221._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_208] (rows=15681803 width=275) + Conds:RS_54._col2=RS_226._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_221] + SHUFFLE [RS_226] PartitionCols:_col0 - Select Operator [SEL_218] (rows=1 width=4) + Select Operator [SEL_223] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_217] (rows=1 width=88) + Filter Operator [FIL_222] (rows=1 width=88) predicate:((sm_carrier) IN ('DIAMOND', 'AIRBORNE') and sm_ship_mode_sk is not null) TableScan [TS_9] (rows=1 width=88) default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_carrier"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_54] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_202] (rows=282272460 width=279) - Conds:RS_51._col0=RS_239._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_207] (rows=282272460 width=279) + Conds:RS_51._col0=RS_244._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_239] + SHUFFLE [RS_244] PartitionCols:_col0 - Select Operator [SEL_237] (rows=652 width=52) + Select Operator [SEL_242] (rows=652 width=52) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] - Filter Operator [FIL_236] (rows=652 width=12) + Filter Operator [FIL_241] (rows=652 width=12) predicate:((d_year = 2002) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_51] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_201] (rows=282272460 width=235) - Conds:RS_259._col1=RS_235._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_206] (rows=282272460 width=235) + Conds:RS_264._col1=RS_240._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_235] + SHUFFLE [RS_240] PartitionCols:_col0 - Select Operator [SEL_233] (rows=33426 width=4) + Select Operator [SEL_238] (rows=33426 width=4) Output:["_col0"] - Filter Operator [FIL_232] (rows=33426 width=8) + Filter Operator [FIL_237] (rows=33426 width=8) predicate:(t_time BETWEEN 49530 AND 78330 and t_time_sk is not null) TableScan [TS_3] (rows=86400 width=8) default@time_dim,time_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["t_time_sk","t_time"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_259] + SHUFFLE [RS_264] PartitionCols:_col1 - Select Operator [SEL_258] (rows=282272460 width=239) + Select Operator [SEL_263] (rows=282272460 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_257] (rows=282272460 width=243) + Filter Operator [FIL_262] (rows=282272460 width=243) predicate:(cs_warehouse_sk is not null and cs_sold_date_sk is not null and cs_sold_time_sk is not null and cs_ship_mode_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_55_ship_mode_sm_ship_mode_sk_bloom_filter))) TableScan [TS_33] (rows=287989836 width=243) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_sold_time_sk","cs_ship_mode_sk","cs_warehouse_sk","cs_quantity","cs_ext_sales_price","cs_net_paid_inc_ship_tax"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_256] - Group By Operator [GBY_255] (rows=1 width=12) + BROADCAST [RS_261] + Group By Operator [GBY_260] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_226] - Group By Operator [GBY_224] (rows=1 width=12) + SHUFFLE [RS_231] + Group By Operator [GBY_229] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_222] (rows=1 width=4) + Select Operator [SEL_227] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] + Please refer to the previous Select Operator [SEL_223] <-Reducer 6 [CONTAINS] vectorized - Reduce Output Operator [RS_248] + Reduce Output Operator [RS_253] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 - Group By Operator [GBY_247] (rows=2513727 width=4510) + Group By Operator [GBY_252] (rows=2513727 width=4510) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)","sum(_col30)","sum(_col31)","sum(_col32)","sum(_col33)","sum(_col34)","sum(_col35)","sum(_col36)","sum(_col37)","sum(_col38)","sum(_col39)","sum(_col40)","sum(_col41)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 - Top N Key Operator [TNK_246] (rows=2513727 width=3166) - keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 - Select Operator [SEL_245] (rows=2513727 width=3166) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] - Group By Operator [GBY_244] (rows=27 width=3166) + Select Operator [SEL_251] (rows=2513727 width=3166) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41"] + Top N Key Operator [TNK_250] (rows=2513727 width=3166) + keys:_col0, _col1, _col2, _col3, _col4, _col5,top n:100 + Group By Operator [GBY_249] (rows=27 width=3166) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","sum(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)","sum(VALUE._col12)","sum(VALUE._col13)","sum(VALUE._col14)","sum(VALUE._col15)","sum(VALUE._col16)","sum(VALUE._col17)","sum(VALUE._col18)","sum(VALUE._col19)","sum(VALUE._col20)","sum(VALUE._col21)","sum(VALUE._col22)","sum(VALUE._col23)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] @@ -600,57 +600,57 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"],aggregations:["sum(_col6)","sum(_col7)","sum(_col8)","sum(_col9)","sum(_col10)","sum(_col11)","sum(_col12)","sum(_col13)","sum(_col14)","sum(_col15)","sum(_col16)","sum(_col17)","sum(_col18)","sum(_col19)","sum(_col20)","sum(_col21)","sum(_col22)","sum(_col23)","sum(_col24)","sum(_col25)","sum(_col26)","sum(_col27)","sum(_col28)","sum(_col29)"],keys:_col0, _col1, _col2, _col3, _col4, _col5 Select Operator [SEL_27] (rows=7992175 width=750) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29"] - Merge Join Operator [MERGEJOIN_200] (rows=7992175 width=750) - Conds:RS_24._col3=RS_242._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] + Merge Join Operator [MERGEJOIN_205] (rows=7992175 width=750) + Conds:RS_24._col3=RS_247._col0(Inner),Output:["_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col22","_col23","_col24","_col25","_col26","_col27"] <-Map 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_242] + SHUFFLE [RS_247] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_241] + Please refer to the previous Select Operator [SEL_246] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_199] (rows=7992175 width=275) - Conds:RS_21._col2=RS_219._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_204] (rows=7992175 width=275) + Conds:RS_21._col2=RS_224._col0(Inner),Output:["_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_219] + SHUFFLE [RS_224] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_218] + Please refer to the previous Select Operator [SEL_223] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_198] (rows=143859154 width=279) - Conds:RS_18._col0=RS_238._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] + Merge Join Operator [MERGEJOIN_203] (rows=143859154 width=279) + Conds:RS_18._col0=RS_243._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_238] + SHUFFLE [RS_243] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_237] + Please refer to the previous Select Operator [SEL_242] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_197] (rows=143859154 width=235) - Conds:RS_231._col1=RS_234._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_202] (rows=143859154 width=235) + Conds:RS_236._col1=RS_239._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_234] + SHUFFLE [RS_239] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_233] + Please refer to the previous Select Operator [SEL_238] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_231] + SHUFFLE [RS_236] PartitionCols:_col1 - Select Operator [SEL_230] (rows=143859154 width=239) + Select Operator [SEL_235] (rows=143859154 width=239) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_229] (rows=143859154 width=243) + Filter Operator [FIL_234] (rows=143859154 width=243) predicate:(ws_sold_time_sk is not null and ws_warehouse_sk is not null and ws_sold_date_sk is not null and ws_ship_mode_sk is not null and ws_ship_mode_sk BETWEEN DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(ws_ship_mode_sk, DynamicValue(RS_22_ship_mode_sm_ship_mode_sk_bloom_filter))) TableScan [TS_0] (rows=144002668 width=243) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_sold_time_sk","ws_ship_mode_sk","ws_warehouse_sk","ws_quantity","ws_sales_price","ws_net_paid_inc_tax"] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_228] - Group By Operator [GBY_227] (rows=1 width=12) + BROADCAST [RS_233] + Group By Operator [GBY_232] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 17 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] - Group By Operator [GBY_223] (rows=1 width=12) + SHUFFLE [RS_230] + Group By Operator [GBY_228] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_220] (rows=1 width=4) + Select Operator [SEL_225] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_218] + Please refer to the previous Select Operator [SEL_223] diff --git ql/src/test/results/clientpositive/perf/tez/query69.q.out ql/src/test/results/clientpositive/perf/tez/query69.q.out index d11b5494e0..16bf1f599c 100644 --- ql/src/test/results/clientpositive/perf/tez/query69.q.out +++ ql/src/test/results/clientpositive/perf/tez/query69.q.out @@ -133,30 +133,30 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_231] - Limit [LIM_230] (rows=1 width=383) + File Output Operator [FS_236] + Limit [LIM_235] (rows=1 width=383) Number of rows:100 - Select Operator [SEL_229] (rows=1 width=383) + Select Operator [SEL_234] (rows=1 width=383) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_228] - Select Operator [SEL_227] (rows=1 width=383) + SHUFFLE [RS_233] + Select Operator [SEL_232] (rows=1 width=383) Output:["_col0","_col1","_col2","_col3","_col4","_col6"] - Group By Operator [GBY_226] (rows=1 width=367) + Group By Operator [GBY_231] (rows=1 width=367) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_68] (rows=1 width=367) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10 - Top N Key Operator [TNK_105] (rows=1 width=363) - keys:_col6, _col7, _col8, _col9, _col10,top n:100 - Select Operator [SEL_67] (rows=1 width=363) - Output:["_col6","_col7","_col8","_col9","_col10"] + Select Operator [SEL_67] (rows=1 width=363) + Output:["_col6","_col7","_col8","_col9","_col10"] + Top N Key Operator [TNK_108] (rows=1 width=363) + keys:_col6, _col7, _col8, _col9, _col10,top n:100 Filter Operator [FIL_66] (rows=1 width=363) predicate:_col13 is null - Merge Join Operator [MERGEJOIN_184] (rows=1401496 width=363) - Conds:RS_63._col0=RS_225._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col13"] + Merge Join Operator [MERGEJOIN_189] (rows=1401496 width=363) + Conds:RS_63._col0=RS_230._col1(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col13"] <-Reducer 5 [SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_63] PartitionCols:_col0 @@ -164,55 +164,55 @@ Stage-0 Output:["_col0","_col6","_col7","_col8","_col9","_col10"] Filter Operator [FIL_47] (rows=1 width=367) predicate:_col11 is null - Merge Join Operator [MERGEJOIN_183] (rows=1414922 width=367) - Conds:RS_44._col0=RS_217._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_188] (rows=1414922 width=367) + Conds:RS_44._col0=RS_222._col1(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11"] <-Reducer 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_217] + SHUFFLE [RS_222] PartitionCols:_col1 - Select Operator [SEL_216] (rows=1414922 width=7) + Select Operator [SEL_221] (rows=1414922 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_215] (rows=1414922 width=3) + Group By Operator [GBY_220] (rows=1414922 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0 Group By Operator [GBY_29] (rows=143930993 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_180] (rows=143930993 width=3) - Conds:RS_214._col0=RS_198._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_185] (rows=143930993 width=3) + Conds:RS_219._col0=RS_203._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_198] + SHUFFLE [RS_203] PartitionCols:_col0 - Select Operator [SEL_195] (rows=150 width=4) + Select Operator [SEL_200] (rows=150 width=4) Output:["_col0"] - Filter Operator [FIL_194] (rows=150 width=12) + Filter Operator [FIL_199] (rows=150 width=12) predicate:((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) TableScan [TS_12] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"] <-Map 21 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_214] + SHUFFLE [RS_219] PartitionCols:_col0 - Select Operator [SEL_213] (rows=143930993 width=7) + Select Operator [SEL_218] (rows=143930993 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_212] (rows=143930993 width=7) + Filter Operator [FIL_217] (rows=143930993 width=7) predicate:(ws_bill_customer_sk is not null and ws_sold_date_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_26_date_dim_d_date_sk_min) AND DynamicValue(RS_26_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_26_date_dim_d_date_sk_bloom_filter))) TableScan [TS_19] (rows=144002668 width=7) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"] <-Reducer 18 [BROADCAST_EDGE] vectorized - BROADCAST [RS_211] - Group By Operator [GBY_210] (rows=1 width=12) + BROADCAST [RS_216] + Group By Operator [GBY_215] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_204] - Group By Operator [GBY_202] (rows=1 width=12) + SHUFFLE [RS_209] + Group By Operator [GBY_207] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_199] (rows=150 width=4) + Select Operator [SEL_204] (rows=150 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_195] + Please refer to the previous Select Operator [SEL_200] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_44] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_182] (rows=525327388 width=363) + Merge Join Operator [MERGEJOIN_187] (rows=525327388 width=363) Conds:RS_41._col0=RS_42._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_42] @@ -221,105 +221,105 @@ Stage-0 Output:["_col0"],keys:_col0 Select Operator [SEL_18] (rows=525327388 width=3) Output:["_col0"] - Merge Join Operator [MERGEJOIN_179] (rows=525327388 width=3) - Conds:RS_209._col0=RS_196._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_184] (rows=525327388 width=3) + Conds:RS_214._col0=RS_201._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_196] + SHUFFLE [RS_201] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_195] + Please refer to the previous Select Operator [SEL_200] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_209] + SHUFFLE [RS_214] PartitionCols:_col0 - Select Operator [SEL_208] (rows=525327388 width=7) + Select Operator [SEL_213] (rows=525327388 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_207] (rows=525327388 width=7) + Filter Operator [FIL_212] (rows=525327388 width=7) predicate:(ss_sold_date_sk is not null and ss_customer_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_16_date_dim_d_date_sk_min) AND DynamicValue(RS_16_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_16_date_dim_d_date_sk_bloom_filter))) TableScan [TS_9] (rows=575995635 width=7) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"] <-Reducer 15 [BROADCAST_EDGE] vectorized - BROADCAST [RS_206] - Group By Operator [GBY_205] (rows=1 width=12) + BROADCAST [RS_211] + Group By Operator [GBY_210] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 14 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_203] - Group By Operator [GBY_201] (rows=1 width=12) + SHUFFLE [RS_208] + Group By Operator [GBY_206] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_197] (rows=150 width=4) + Select Operator [SEL_202] (rows=150 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_195] + Please refer to the previous Select Operator [SEL_200] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_41] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_178] (rows=4605476 width=363) - Conds:RS_36._col1=RS_193._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] + Merge Join Operator [MERGEJOIN_183] (rows=4605476 width=363) + Conds:RS_36._col1=RS_198._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_193] + SHUFFLE [RS_198] PartitionCols:_col0 - Select Operator [SEL_192] (rows=1861800 width=363) + Select Operator [SEL_197] (rows=1861800 width=363) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_191] (rows=1861800 width=363) + Filter Operator [FIL_196] (rows=1861800 width=363) predicate:cd_demo_sk is not null TableScan [TS_6] (rows=1861800 width=363) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_36] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_177] (rows=4541258 width=5) - Conds:RS_187._col2=RS_190._col0(Inner),Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_182] (rows=4541258 width=5) + Conds:RS_192._col2=RS_195._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_187] + SHUFFLE [RS_192] PartitionCols:_col2 - Select Operator [SEL_186] (rows=77201384 width=11) + Select Operator [SEL_191] (rows=77201384 width=11) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_185] (rows=77201384 width=11) + Filter Operator [FIL_190] (rows=77201384 width=11) predicate:(c_current_cdemo_sk is not null and c_current_addr_sk is not null and c_customer_sk is not null) TableScan [TS_0] (rows=80000000 width=11) default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_190] + SHUFFLE [RS_195] PartitionCols:_col0 - Select Operator [SEL_189] (rows=2352941 width=90) + Select Operator [SEL_194] (rows=2352941 width=90) Output:["_col0"] - Filter Operator [FIL_188] (rows=2352941 width=90) + Filter Operator [FIL_193] (rows=2352941 width=90) predicate:((ca_state) IN ('CO', 'IL', 'MN') and ca_address_sk is not null) TableScan [TS_3] (rows=40000000 width=90) default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_state"] <-Reducer 20 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_225] + SHUFFLE [RS_230] PartitionCols:_col1 - Select Operator [SEL_224] (rows=1401496 width=7) + Select Operator [SEL_229] (rows=1401496 width=7) Output:["_col0","_col1"] - Group By Operator [GBY_223] (rows=1401496 width=3) + Group By Operator [GBY_228] (rows=1401496 width=3) Output:["_col0"],keys:KEY._col0 <-Reducer 19 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col0 Group By Operator [GBY_59] (rows=285115246 width=3) Output:["_col0"],keys:_col1 - Merge Join Operator [MERGEJOIN_181] (rows=285115246 width=3) - Conds:RS_222._col0=RS_200._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_186] (rows=285115246 width=3) + Conds:RS_227._col0=RS_205._col0(Inner),Output:["_col1"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_200] + SHUFFLE [RS_205] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_195] + Please refer to the previous Select Operator [SEL_200] <-Map 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_222] + SHUFFLE [RS_227] PartitionCols:_col0 - Select Operator [SEL_221] (rows=285115246 width=7) + Select Operator [SEL_226] (rows=285115246 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_220] (rows=285115246 width=7) + Filter Operator [FIL_225] (rows=285115246 width=7) predicate:(cs_ship_customer_sk is not null and cs_sold_date_sk is not null and cs_ship_customer_sk BETWEEN DynamicValue(RS_63_c_c_customer_sk_min) AND DynamicValue(RS_63_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_63_c_c_customer_sk_bloom_filter))) TableScan [TS_49] (rows=287989836 width=7) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_219] - Group By Operator [GBY_218] (rows=1 width=12) + BROADCAST [RS_224] + Group By Operator [GBY_223] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_168] - Group By Operator [GBY_167] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_173] + Group By Operator [GBY_172] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_166] (rows=1 width=4) + Select Operator [SEL_171] (rows=1 width=4) Output:["_col0"] Please refer to the previous Select Operator [SEL_48] diff --git ql/src/test/results/clientpositive/perf/tez/query7.q.out ql/src/test/results/clientpositive/perf/tez/query7.q.out index c17ec8aeb9..2679b52b8e 100644 --- ql/src/test/results/clientpositive/perf/tez/query7.q.out +++ ql/src/test/results/clientpositive/perf/tez/query7.q.out @@ -67,95 +67,95 @@ Stage-0 limit:100 Stage-1 Reducer 7 vectorized - File Output Operator [FS_125] - Limit [LIM_124] (rows=100 width=444) + File Output Operator [FS_130] + Limit [LIM_129] (rows=100 width=444) Number of rows:100 - Select Operator [SEL_123] (rows=310774 width=444) + Select Operator [SEL_128] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_122] - Select Operator [SEL_121] (rows=310774 width=444) + SHUFFLE [RS_127] + Select Operator [SEL_126] (rows=310774 width=444) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_120] (rows=310774 width=476) + Group By Operator [GBY_125] (rows=310774 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 Group By Operator [GBY_28] (rows=462000 width=476) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col12 - Top N Key Operator [TNK_55] (rows=4635977 width=100) + Top N Key Operator [TNK_58] (rows=4635977 width=100) keys:_col12,top n:100 - Merge Join Operator [MERGEJOIN_99] (rows=4635977 width=100) - Conds:RS_24._col1=RS_119._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] + Merge Join Operator [MERGEJOIN_104] (rows=4635977 width=100) + Conds:RS_24._col1=RS_124._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col12"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_119] + SHUFFLE [RS_124] PartitionCols:_col0 - Select Operator [SEL_118] (rows=462000 width=104) + Select Operator [SEL_123] (rows=462000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_117] (rows=462000 width=104) + Filter Operator [FIL_122] (rows=462000 width=104) predicate:i_item_sk is not null TableScan [TS_12] (rows=462000 width=104) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_98] (rows=4635977 width=4) - Conds:RS_21._col3=RS_116._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_103] (rows=4635977 width=4) + Conds:RS_21._col3=RS_121._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_116] + SHUFFLE [RS_121] PartitionCols:_col0 - Select Operator [SEL_115] (rows=2300 width=4) + Select Operator [SEL_120] (rows=2300 width=4) Output:["_col0"] - Filter Operator [FIL_114] (rows=2300 width=174) + Filter Operator [FIL_119] (rows=2300 width=174) predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) TableScan [TS_9] (rows=2300 width=174) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_email","p_channel_event"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_97] (rows=4635977 width=4) - Conds:RS_18._col0=RS_113._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_102] (rows=4635977 width=4) + Conds:RS_18._col0=RS_118._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_113] + SHUFFLE [RS_118] PartitionCols:_col0 - Select Operator [SEL_112] (rows=652 width=4) + Select Operator [SEL_117] (rows=652 width=4) Output:["_col0"] - Filter Operator [FIL_111] (rows=652 width=8) + Filter Operator [FIL_116] (rows=652 width=8) predicate:((d_year = 1998) and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_96] (rows=4635977 width=4) - Conds:RS_110._col2=RS_102._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + Merge Join Operator [MERGEJOIN_101] (rows=4635977 width=4) + Conds:RS_115._col2=RS_107._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] <-Map 8 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_102] + PARTITION_ONLY_SHUFFLE [RS_107] PartitionCols:_col0 - Select Operator [SEL_101] (rows=14776 width=4) + Select Operator [SEL_106] (rows=14776 width=4) Output:["_col0"] - Filter Operator [FIL_100] (rows=14776 width=268) + Filter Operator [FIL_105] (rows=14776 width=268) predicate:((cd_marital_status = 'W') and (cd_education_status = 'Primary') and (cd_gender = 'F') and cd_demo_sk is not null) TableScan [TS_3] (rows=1861800 width=268) default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_110] + SHUFFLE [RS_115] PartitionCols:_col2 - Select Operator [SEL_109] (rows=501686735 width=340) + Select Operator [SEL_114] (rows=501686735 width=340) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_108] (rows=501686735 width=340) + Filter Operator [FIL_113] (rows=501686735 width=340) predicate:(ss_cdemo_sk is not null and ss_sold_date_sk is not null and ss_promo_sk is not null and ss_item_sk is not null and ss_cdemo_sk BETWEEN DynamicValue(RS_16_customer_demographics_cd_demo_sk_min) AND DynamicValue(RS_16_customer_demographics_cd_demo_sk_max) and in_bloom_filter(ss_cdemo_sk, DynamicValue(RS_16_customer_demographics_cd_demo_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=340) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] <-Reducer 9 [BROADCAST_EDGE] vectorized - BROADCAST [RS_107] - Group By Operator [GBY_106] (rows=1 width=12) + BROADCAST [RS_112] + Group By Operator [GBY_111] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 8 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_105] - Group By Operator [GBY_104] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_110] + Group By Operator [GBY_109] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_103] (rows=14776 width=4) + Select Operator [SEL_108] (rows=14776 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_101] + Please refer to the previous Select Operator [SEL_106] diff --git ql/src/test/results/clientpositive/perf/tez/query76.q.out ql/src/test/results/clientpositive/perf/tez/query76.q.out index c0d60e88cc..deecf746d3 100644 --- ql/src/test/results/clientpositive/perf/tez/query76.q.out +++ ql/src/test/results/clientpositive/perf/tez/query76.q.out @@ -73,132 +73,132 @@ Stage-0 limit:100 Stage-1 Reducer 6 vectorized - File Output Operator [FS_183] - Limit [LIM_182] (rows=100 width=408) + File Output Operator [FS_188] + Limit [LIM_187] (rows=100 width=408) Number of rows:100 - Select Operator [SEL_181] (rows=5600 width=408) + Select Operator [SEL_186] (rows=5600 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 5 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_180] - Group By Operator [GBY_179] (rows=5600 width=408) + SHUFFLE [RS_185] + Group By Operator [GBY_184] (rows=5600 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 4 [SIMPLE_EDGE] <-Reducer 10 [CONTAINS] - Reduce Output Operator [RS_167] + Reduce Output Operator [RS_172] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_166] (rows=224000 width=408) + Group By Operator [GBY_171] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_165] (rows=26219002 width=388) + Top N Key Operator [TNK_170] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_163] (rows=1433911 width=399) + Select Operator [SEL_168] (rows=1433911 width=399) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_162] (rows=1433911 width=209) - Conds:RS_45._col0=RS_195._col0(Inner),Output:["_col2","_col4","_col6","_col7"] + Merge Join Operator [MERGEJOIN_167] (rows=1433911 width=209) + Conds:RS_45._col0=RS_200._col0(Inner),Output:["_col2","_col4","_col6","_col7"] <-Map 16 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_195] + SHUFFLE [RS_200] PartitionCols:_col0 - Select Operator [SEL_194] (rows=73049 width=12) + Select Operator [SEL_199] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_193] (rows=73049 width=12) + Filter Operator [FIL_198] (rows=73049 width=12) predicate:d_date_sk is not null TableScan [TS_39] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_45] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_148] (rows=1433911 width=205) - Conds:RS_192._col1=RS_172._col0(Inner),Output:["_col0","_col2","_col4"] + Merge Join Operator [MERGEJOIN_153] (rows=1433911 width=205) + Conds:RS_197._col1=RS_177._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_172] + SHUFFLE [RS_177] PartitionCols:_col0 - Select Operator [SEL_169] (rows=462000 width=94) + Select Operator [SEL_174] (rows=462000 width=94) Output:["_col0","_col1"] - Filter Operator [FIL_168] (rows=462000 width=94) + Filter Operator [FIL_173] (rows=462000 width=94) predicate:i_item_sk is not null TableScan [TS_0] (rows=462000 width=94) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category"] <-Map 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_192] + SHUFFLE [RS_197] PartitionCols:_col1 - Select Operator [SEL_191] (rows=1433911 width=119) + Select Operator [SEL_196] (rows=1433911 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_190] (rows=1433911 width=123) + Filter Operator [FIL_195] (rows=1433911 width=123) predicate:(cs_warehouse_sk is null and cs_sold_date_sk is not null and cs_item_sk is not null) TableScan [TS_33] (rows=287989836 width=123) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_warehouse_sk","cs_item_sk","cs_ext_sales_price"] <-Reducer 3 [CONTAINS] - Reduce Output Operator [RS_155] + Reduce Output Operator [RS_160] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_154] (rows=224000 width=408) + Group By Operator [GBY_159] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_153] (rows=26219002 width=388) + Top N Key Operator [TNK_158] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_151] (rows=24749363 width=387) + Select Operator [SEL_156] (rows=24749363 width=387) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_150] (rows=24749363 width=204) - Conds:RS_12._col2=RS_178._col0(Inner),Output:["_col1","_col4","_col6","_col7"] + Merge Join Operator [MERGEJOIN_155] (rows=24749363 width=204) + Conds:RS_12._col2=RS_183._col0(Inner),Output:["_col1","_col4","_col6","_col7"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_178] + SHUFFLE [RS_183] PartitionCols:_col0 - Select Operator [SEL_177] (rows=73049 width=12) + Select Operator [SEL_182] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_176] (rows=73049 width=12) + Filter Operator [FIL_181] (rows=73049 width=12) predicate:d_date_sk is not null TableScan [TS_6] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_144] (rows=24749363 width=200) - Conds:RS_170._col0=RS_175._col1(Inner),Output:["_col1","_col2","_col4"] + Merge Join Operator [MERGEJOIN_149] (rows=24749363 width=200) + Conds:RS_175._col0=RS_180._col1(Inner),Output:["_col1","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_170] + SHUFFLE [RS_175] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_169] + Please refer to the previous Select Operator [SEL_174] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_175] + SHUFFLE [RS_180] PartitionCols:_col1 - Select Operator [SEL_174] (rows=24749363 width=114) + Select Operator [SEL_179] (rows=24749363 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_173] (rows=24749363 width=118) + Filter Operator [FIL_178] (rows=24749363 width=118) predicate:(ss_addr_sk is null and ss_sold_date_sk is not null and ss_item_sk is not null) TableScan [TS_3] (rows=575995635 width=118) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_addr_sk","ss_ext_sales_price"] <-Reducer 8 [CONTAINS] - Reduce Output Operator [RS_161] + Reduce Output Operator [RS_166] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_160] (rows=224000 width=408) + Group By Operator [GBY_165] (rows=224000 width=408) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["count()","sum(_col5)"],keys:_col0, _col1, _col2, _col3, _col4 - Top N Key Operator [TNK_159] (rows=26219002 width=388) + Top N Key Operator [TNK_164] (rows=26219002 width=388) keys:_col0, _col1, _col2, _col3, _col4,top n:100 - Select Operator [SEL_157] (rows=35728 width=394) + Select Operator [SEL_162] (rows=35728 width=394) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Merge Join Operator [MERGEJOIN_156] (rows=35728 width=209) - Conds:RS_28._col0=RS_189._col0(Inner),Output:["_col2","_col4","_col6","_col7"] + Merge Join Operator [MERGEJOIN_161] (rows=35728 width=209) + Conds:RS_28._col0=RS_194._col0(Inner),Output:["_col2","_col4","_col6","_col7"] <-Map 14 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_189] + SHUFFLE [RS_194] PartitionCols:_col0 - Select Operator [SEL_188] (rows=73049 width=12) + Select Operator [SEL_193] (rows=73049 width=12) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_187] (rows=73049 width=12) + Filter Operator [FIL_192] (rows=73049 width=12) predicate:d_date_sk is not null TableScan [TS_22] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_146] (rows=35728 width=205) - Conds:RS_186._col1=RS_171._col0(Inner),Output:["_col0","_col2","_col4"] + Merge Join Operator [MERGEJOIN_151] (rows=35728 width=205) + Conds:RS_191._col1=RS_176._col0(Inner),Output:["_col0","_col2","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_171] + SHUFFLE [RS_176] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_169] + Please refer to the previous Select Operator [SEL_174] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_186] + SHUFFLE [RS_191] PartitionCols:_col1 - Select Operator [SEL_185] (rows=35728 width=119) + Select Operator [SEL_190] (rows=35728 width=119) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_184] (rows=35728 width=123) + Filter Operator [FIL_189] (rows=35728 width=123) predicate:(ws_web_page_sk is null and ws_sold_date_sk is not null and ws_item_sk is not null) TableScan [TS_16] (rows=144002668 width=123) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_page_sk","ws_ext_sales_price"] diff --git ql/src/test/results/clientpositive/perf/tez/query77.q.out ql/src/test/results/clientpositive/perf/tez/query77.q.out index ab2b3dc570..fcfc5a33bc 100644 --- ql/src/test/results/clientpositive/perf/tez/query77.q.out +++ ql/src/test/results/clientpositive/perf/tez/query77.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join MERGEJOIN[317][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product +Warning: Shuffle Join MERGEJOIN[319][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 16' is a cross product PREHOOK: query: explain with ss as (select s_store_sk, @@ -267,280 +267,280 @@ Stage-0 limit:100 Stage-1 Reducer 8 vectorized - File Output Operator [FS_367] - Limit [LIM_366] (rows=100 width=439) + File Output Operator [FS_369] + Limit [LIM_368] (rows=100 width=439) Number of rows:100 - Select Operator [SEL_365] (rows=561 width=439) + Select Operator [SEL_367] (rows=561 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 7 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_364] - Select Operator [SEL_363] (rows=561 width=439) + SHUFFLE [RS_366] + Select Operator [SEL_365] (rows=561 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_362] (rows=561 width=447) + Group By Operator [GBY_364] (rows=561 width=447) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 6 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] - Reduce Output Operator [RS_322] + Reduce Output Operator [RS_324] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_321] (rows=561 width=447) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_320] (rows=526 width=435) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_318] (rows=10 width=439) + Top N Key Operator [TNK_323] (rows=561 width=447) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_322] (rows=561 width=447) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_320] (rows=10 width=439) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_317] (rows=10 width=452) + Merge Join Operator [MERGEJOIN_319] (rows=10 width=452) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_374] - Group By Operator [GBY_373] (rows=10 width=228) + PARTITION_ONLY_SHUFFLE [RS_376] + Group By Operator [GBY_375] (rows=10 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_55] PartitionCols:_col0 Group By Operator [GBY_54] (rows=2550 width=227) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col1 - Merge Join Operator [MERGEJOIN_302] (rows=286549727 width=227) - Conds:RS_372._col0=RS_334._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_304] (rows=286549727 width=227) + Conds:RS_374._col0=RS_336._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_334] + SHUFFLE [RS_336] PartitionCols:_col0 - Select Operator [SEL_330] (rows=8116 width=4) + Select Operator [SEL_332] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_329] (rows=8116 width=98) + Filter Operator [FIL_331] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 30 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_372] + SHUFFLE [RS_374] PartitionCols:_col0 - Select Operator [SEL_371] (rows=286549727 width=231) + Select Operator [SEL_373] (rows=286549727 width=231) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_370] (rows=286549727 width=231) + Filter Operator [FIL_372] (rows=286549727 width=231) predicate:(cs_sold_date_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_51_date_dim_d_date_sk_min) AND DynamicValue(RS_51_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_51_date_dim_d_date_sk_bloom_filter))) TableScan [TS_44] (rows=287989836 width=231) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_call_center_sk","cs_ext_sales_price","cs_net_profit"] <-Reducer 17 [BROADCAST_EDGE] vectorized - BROADCAST [RS_369] - Group By Operator [GBY_368] (rows=1 width=12) + BROADCAST [RS_371] + Group By Operator [GBY_370] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_344] - Group By Operator [GBY_341] (rows=1 width=12) + SHUFFLE [RS_346] + Group By Operator [GBY_343] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_335] (rows=8116 width=4) + Select Operator [SEL_337] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_332] <-Reducer 19 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_379] - Group By Operator [GBY_378] (rows=1 width=224) + PARTITION_ONLY_SHUFFLE [RS_381] + Group By Operator [GBY_380] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Reducer 18 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_69] Group By Operator [GBY_68] (rows=1 width=224) Output:["_col0","_col1"],aggregations:["sum(_col1)","sum(_col2)"] - Merge Join Operator [MERGEJOIN_303] (rows=3199657 width=183) - Conds:RS_377._col0=RS_336._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_305] (rows=3199657 width=183) + Conds:RS_379._col0=RS_338._col0(Inner),Output:["_col1","_col2"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_336] + SHUFFLE [RS_338] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_332] <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_377] + SHUFFLE [RS_379] PartitionCols:_col0 - Select Operator [SEL_376] (rows=28798881 width=223) + Select Operator [SEL_378] (rows=28798881 width=223) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_375] (rows=28798881 width=223) + Filter Operator [FIL_377] (rows=28798881 width=223) predicate:cr_returned_date_sk is not null TableScan [TS_58] (rows=28798881 width=223) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_returned_date_sk","cr_return_amount","cr_net_loss"] <-Reducer 23 [CONTAINS] - Reduce Output Operator [RS_328] + Reduce Output Operator [RS_330] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_327] (rows=561 width=447) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_326] (rows=526 width=435) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_324] (rows=392 width=435) + Top N Key Operator [TNK_329] (rows=561 width=447) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_328] (rows=561 width=447) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_326] (rows=392 width=435) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_323] (rows=392 width=335) - Conds:RS_390._col0=RS_395._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_325] (rows=392 width=335) + Conds:RS_392._col0=RS_397._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 22 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_390] + SHUFFLE [RS_392] PartitionCols:_col0 - Group By Operator [GBY_389] (rows=205 width=228) + Group By Operator [GBY_391] (rows=205 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_94] PartitionCols:_col0 Group By Operator [GBY_93] (rows=26445 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col5 - Merge Join Operator [MERGEJOIN_305] (rows=143931136 width=227) - Conds:RS_89._col1=RS_387._col0(Inner),Output:["_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_307] (rows=143931136 width=227) + Conds:RS_89._col1=RS_389._col0(Inner),Output:["_col2","_col3","_col5"] <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_387] + SHUFFLE [RS_389] PartitionCols:_col0 - Select Operator [SEL_386] (rows=4602 width=4) + Select Operator [SEL_388] (rows=4602 width=4) Output:["_col0"] - Filter Operator [FIL_385] (rows=4602 width=4) + Filter Operator [FIL_387] (rows=4602 width=4) predicate:wp_web_page_sk is not null TableScan [TS_83] (rows=4602 width=4) default@web_page,web_page,Tbl:COMPLETE,Col:COMPLETE,Output:["wp_web_page_sk"] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_89] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_304] (rows=143931136 width=227) - Conds:RS_384._col0=RS_337._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_306] (rows=143931136 width=227) + Conds:RS_386._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_337] + SHUFFLE [RS_339] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_332] <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_384] + SHUFFLE [RS_386] PartitionCols:_col0 - Select Operator [SEL_383] (rows=143931136 width=231) + Select Operator [SEL_385] (rows=143931136 width=231) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_382] (rows=143931136 width=231) + Filter Operator [FIL_384] (rows=143931136 width=231) predicate:(ws_sold_date_sk is not null and ws_web_page_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_87_date_dim_d_date_sk_min) AND DynamicValue(RS_87_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_87_date_dim_d_date_sk_bloom_filter))) TableScan [TS_77] (rows=144002668 width=231) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_web_page_sk","ws_ext_sales_price","ws_net_profit"] <-Reducer 24 [BROADCAST_EDGE] vectorized - BROADCAST [RS_381] - Group By Operator [GBY_380] (rows=1 width=12) + BROADCAST [RS_383] + Group By Operator [GBY_382] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_345] - Group By Operator [GBY_342] (rows=1 width=12) + SHUFFLE [RS_347] + Group By Operator [GBY_344] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_338] (rows=8116 width=4) + Select Operator [SEL_340] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_332] <-Reducer 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_395] + SHUFFLE [RS_397] PartitionCols:_col0 - Group By Operator [GBY_394] (rows=187 width=228) + Group By Operator [GBY_396] (rows=187 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 26 [SIMPLE_EDGE] SHUFFLE [RS_114] PartitionCols:_col0 Group By Operator [GBY_113] (rows=2244 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col5 - Merge Join Operator [MERGEJOIN_307] (rows=13129719 width=217) - Conds:RS_109._col1=RS_388._col0(Inner),Output:["_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_309] (rows=13129719 width=217) + Conds:RS_109._col1=RS_390._col0(Inner),Output:["_col2","_col3","_col5"] <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_388] + SHUFFLE [RS_390] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_386] + Please refer to the previous Select Operator [SEL_388] <-Reducer 25 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_306] (rows=13129719 width=217) - Conds:RS_393._col0=RS_339._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_308] (rows=13129719 width=217) + Conds:RS_395._col0=RS_341._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_339] + SHUFFLE [RS_341] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_332] <-Map 34 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_393] + SHUFFLE [RS_395] PartitionCols:_col0 - Select Operator [SEL_392] (rows=13129719 width=221) + Select Operator [SEL_394] (rows=13129719 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_391] (rows=13129719 width=221) + Filter Operator [FIL_393] (rows=13129719 width=221) predicate:(wr_web_page_sk is not null and wr_returned_date_sk is not null) TableScan [TS_97] (rows=14398467 width=221) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_returned_date_sk","wr_web_page_sk","wr_return_amt","wr_net_loss"] <-Reducer 5 [CONTAINS] - Reduce Output Operator [RS_316] + Reduce Output Operator [RS_318] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_315] (rows=561 width=447) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_314] (rows=526 width=435) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_312] (rows=124 width=437) + Top N Key Operator [TNK_317] (rows=561 width=447) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_316] (rows=561 width=447) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_314] (rows=124 width=437) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_311] (rows=124 width=379) - Conds:RS_356._col0=RS_361._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] + Merge Join Operator [MERGEJOIN_313] (rows=124 width=379) + Conds:RS_358._col0=RS_363._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"] <-Reducer 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_361] + SHUFFLE [RS_363] PartitionCols:_col0 - Group By Operator [GBY_360] (rows=84 width=228) + Group By Operator [GBY_362] (rows=84 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_37] PartitionCols:_col0 Group By Operator [GBY_36] (rows=3948 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col5 - Merge Join Operator [MERGEJOIN_301] (rows=53634860 width=220) - Conds:RS_32._col1=RS_354._col0(Inner),Output:["_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_303] (rows=53634860 width=220) + Conds:RS_32._col1=RS_356._col0(Inner),Output:["_col2","_col3","_col5"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_354] + SHUFFLE [RS_356] PartitionCols:_col0 - Select Operator [SEL_352] (rows=1704 width=4) + Select Operator [SEL_354] (rows=1704 width=4) Output:["_col0"] - Filter Operator [FIL_351] (rows=1704 width=4) + Filter Operator [FIL_353] (rows=1704 width=4) predicate:s_store_sk is not null TableScan [TS_6] (rows=1704 width=4) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_32] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_300] (rows=53634860 width=220) - Conds:RS_359._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_302] (rows=53634860 width=220) + Conds:RS_361._col0=RS_335._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_333] + SHUFFLE [RS_335] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_332] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_359] + SHUFFLE [RS_361] PartitionCols:_col0 - Select Operator [SEL_358] (rows=53634860 width=223) + Select Operator [SEL_360] (rows=53634860 width=223) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_357] (rows=53634860 width=223) + Filter Operator [FIL_359] (rows=53634860 width=223) predicate:(sr_store_sk is not null and sr_returned_date_sk is not null) TableScan [TS_20] (rows=57591150 width=223) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_store_sk","sr_return_amt","sr_net_loss"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_356] + SHUFFLE [RS_358] PartitionCols:_col0 - Group By Operator [GBY_355] (rows=83 width=228) + Group By Operator [GBY_357] (rows=83 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=37184 width=228) Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","sum(_col3)"],keys:_col5 - Merge Join Operator [MERGEJOIN_299] (rows=525329897 width=217) - Conds:RS_12._col1=RS_353._col0(Inner),Output:["_col2","_col3","_col5"] + Merge Join Operator [MERGEJOIN_301] (rows=525329897 width=217) + Conds:RS_12._col1=RS_355._col0(Inner),Output:["_col2","_col3","_col5"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_353] + SHUFFLE [RS_355] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_352] + Please refer to the previous Select Operator [SEL_354] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_298] (rows=525329897 width=217) - Conds:RS_350._col0=RS_331._col0(Inner),Output:["_col1","_col2","_col3"] + Merge Join Operator [MERGEJOIN_300] (rows=525329897 width=217) + Conds:RS_352._col0=RS_333._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_331] + SHUFFLE [RS_333] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_332] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_350] + SHUFFLE [RS_352] PartitionCols:_col0 - Select Operator [SEL_349] (rows=525329897 width=221) + Select Operator [SEL_351] (rows=525329897 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_348] (rows=525329897 width=221) + Filter Operator [FIL_350] (rows=525329897 width=221) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_10_date_dim_d_date_sk_min) AND DynamicValue(RS_10_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_10_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=221) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_ext_sales_price","ss_net_profit"] <-Reducer 10 [BROADCAST_EDGE] vectorized - BROADCAST [RS_347] - Group By Operator [GBY_346] (rows=1 width=12) + BROADCAST [RS_349] + Group By Operator [GBY_348] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 9 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_343] - Group By Operator [GBY_340] (rows=1 width=12) + SHUFFLE [RS_345] + Group By Operator [GBY_342] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_332] (rows=8116 width=4) + Select Operator [SEL_334] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_330] + Please refer to the previous Select Operator [SEL_332] diff --git ql/src/test/results/clientpositive/perf/tez/query8.q.out ql/src/test/results/clientpositive/perf/tez/query8.q.out index 0af8fdf3df..c72498b107 100644 --- ql/src/test/results/clientpositive/perf/tez/query8.q.out +++ ql/src/test/results/clientpositive/perf/tez/query8.q.out @@ -245,141 +245,141 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_149] - Limit [LIM_148] (rows=1 width=200) + File Output Operator [FS_154] + Limit [LIM_153] (rows=1 width=200) Number of rows:100 - Select Operator [SEL_147] (rows=1 width=200) + Select Operator [SEL_152] (rows=1 width=200) Output:["_col0","_col1"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_146] - Group By Operator [GBY_145] (rows=1 width=200) + SHUFFLE [RS_151] + Group By Operator [GBY_150] (rows=1 width=200) Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_57] PartitionCols:_col0 Group By Operator [GBY_56] (rows=401 width=200) Output:["_col0","_col1"],aggregations:["sum(_col2)"],keys:_col6 - Top N Key Operator [TNK_84] (rows=525329897 width=194) + Top N Key Operator [TNK_87] (rows=525329897 width=194) keys:_col6,top n:100 - Merge Join Operator [MERGEJOIN_118] (rows=525329897 width=194) + Merge Join Operator [MERGEJOIN_123] (rows=525329897 width=194) Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2","_col6"] <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_53] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_117] (rows=1 width=92) - Conds:RS_141._col0=RS_144._col2(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_122] (rows=1 width=92) + Conds:RS_146._col0=RS_149._col2(Inner),Output:["_col1","_col2"] <-Map 18 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_144] + SHUFFLE [RS_149] PartitionCols:_col2 - Select Operator [SEL_143] (rows=1704 width=276) + Select Operator [SEL_148] (rows=1704 width=276) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_142] (rows=1704 width=181) + Filter Operator [FIL_147] (rows=1704 width=181) predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null) TableScan [TS_42] (rows=1704 width=181) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_name","s_zip"] <-Reducer 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_141] + SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_140] (rows=1 width=184) + Select Operator [SEL_145] (rows=1 width=184) Output:["_col0"] - Filter Operator [FIL_139] (rows=1 width=192) + Filter Operator [FIL_144] (rows=1 width=192) predicate:(_col1 = 2L) - Group By Operator [GBY_138] (rows=5633 width=192) + Group By Operator [GBY_143] (rows=5633 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized - Reduce Output Operator [RS_170] + Reduce Output Operator [RS_175] PartitionCols:_col0 - Group By Operator [GBY_169] (rows=5633 width=192) + Group By Operator [GBY_174] (rows=5633 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_168] (rows=1126 width=192) + Group By Operator [GBY_173] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_167] + SHUFFLE [RS_172] PartitionCols:_col0 - Group By Operator [GBY_166] (rows=1126 width=192) + Group By Operator [GBY_171] (rows=1126 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_165] (rows=2253 width=97) + Select Operator [SEL_170] (rows=2253 width=97) Output:["_col0"] - Filter Operator [FIL_164] (rows=2253 width=97) + Filter Operator [FIL_169] (rows=2253 width=97) predicate:(_col1 > 10L) - Group By Operator [GBY_163] (rows=6761 width=97) + Group By Operator [GBY_168] (rows=6761 width=97) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 Group By Operator [GBY_24] (rows=67610 width=97) Output:["_col0","_col1"],aggregations:["count()"],keys:_col1 - Merge Join Operator [MERGEJOIN_116] (rows=26666667 width=89) - Conds:RS_159._col0=RS_162._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_121] (rows=26666667 width=89) + Conds:RS_164._col0=RS_167._col0(Inner),Output:["_col1"] <-Map 13 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_159] + SHUFFLE [RS_164] PartitionCols:_col0 - Select Operator [SEL_158] (rows=40000000 width=93) + Select Operator [SEL_163] (rows=40000000 width=93) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=40000000 width=93) + Filter Operator [FIL_162] (rows=40000000 width=93) predicate:(ca_address_sk is not null and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_14] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] <-Map 17 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_162] + SHUFFLE [RS_167] PartitionCols:_col0 - Select Operator [SEL_161] (rows=26666667 width=4) + Select Operator [SEL_166] (rows=26666667 width=4) Output:["_col0"] - Filter Operator [FIL_160] (rows=26666667 width=89) + Filter Operator [FIL_165] (rows=26666667 width=89) predicate:((c_preferred_cust_flag = 'Y') and c_current_addr_sk is not null) TableScan [TS_17] (rows=80000000 width=89) default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_current_addr_sk","c_preferred_cust_flag"] <-Reducer 9 [CONTAINS] vectorized - Reduce Output Operator [RS_156] + Reduce Output Operator [RS_161] PartitionCols:_col0 - Group By Operator [GBY_155] (rows=5633 width=192) + Group By Operator [GBY_160] (rows=5633 width=192) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_154] (rows=10141 width=192) + Group By Operator [GBY_159] (rows=10141 width=192) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_153] + SHUFFLE [RS_158] PartitionCols:_col0 - Group By Operator [GBY_152] (rows=141974 width=192) + Group By Operator [GBY_157] (rows=141974 width=192) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_151] (rows=20000000 width=89) + Select Operator [SEL_156] (rows=20000000 width=89) Output:["_col0"] - Filter Operator [FIL_150] (rows=20000000 width=89) + Filter Operator [FIL_155] (rows=20000000 width=89) predicate:((substr(ca_zip, 1, 5)) IN ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') and substr(substr(ca_zip, 1, 5), 1, 2) is not null) TableScan [TS_6] (rows=40000000 width=89) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_zip"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_52] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_115] (rows=525329897 width=110) - Conds:RS_137._col0=RS_129._col0(Inner),Output:["_col1","_col2"] + Merge Join Operator [MERGEJOIN_120] (rows=525329897 width=110) + Conds:RS_142._col0=RS_134._col0(Inner),Output:["_col1","_col2"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_129] + PARTITION_ONLY_SHUFFLE [RS_134] PartitionCols:_col0 - Select Operator [SEL_128] (rows=130 width=4) + Select Operator [SEL_133] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_127] (rows=130 width=12) + Filter Operator [FIL_132] (rows=130 width=12) predicate:((d_year = 2002) and (d_qoy = 1) and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=12) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_qoy"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_137] + SHUFFLE [RS_142] PartitionCols:_col0 - Select Operator [SEL_136] (rows=525329897 width=114) + Select Operator [SEL_141] (rows=525329897 width=114) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_135] (rows=525329897 width=114) + Filter Operator [FIL_140] (rows=525329897 width=114) predicate:(ss_sold_date_sk is not null and ss_store_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_50_date_dim_d_date_sk_min) AND DynamicValue(RS_50_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_50_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=114) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_store_sk","ss_net_profit"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_134] - Group By Operator [GBY_133] (rows=1 width=12) + BROADCAST [RS_139] + Group By Operator [GBY_138] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_132] - Group By Operator [GBY_131] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_137] + Group By Operator [GBY_136] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_130] (rows=130 width=4) + Select Operator [SEL_135] (rows=130 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_128] + Please refer to the previous Select Operator [SEL_133] diff --git ql/src/test/results/clientpositive/perf/tez/query80.q.out ql/src/test/results/clientpositive/perf/tez/query80.q.out index 47844158fa..3020b58781 100644 --- ql/src/test/results/clientpositive/perf/tez/query80.q.out +++ ql/src/test/results/clientpositive/perf/tez/query80.q.out @@ -249,28 +249,28 @@ Stage-0 limit:100 Stage-1 Reducer 10 vectorized - File Output Operator [FS_438] - Limit [LIM_437] (rows=100 width=619) + File Output Operator [FS_440] + Limit [LIM_439] (rows=100 width=619) Number of rows:100 - Select Operator [SEL_436] (rows=59581 width=619) + Select Operator [SEL_438] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_435] - Select Operator [SEL_434] (rows=59581 width=619) + SHUFFLE [RS_437] + Select Operator [SEL_436] (rows=59581 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_433] (rows=59581 width=627) + Group By Operator [GBY_435] (rows=59581 width=627) Output:["_col0","_col1","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Union 8 [SIMPLE_EDGE] <-Reducer 18 [CONTAINS] vectorized - Reduce Output Operator [RS_454] + Reduce Output Operator [RS_456] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_453] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_452] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_451] (rows=38846 width=619) + Top N Key Operator [TNK_455] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_454] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_453] (rows=38846 width=619) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_450] (rows=38846 width=436) + Group By Operator [GBY_452] (rows=38846 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 17 [SIMPLE_EDGE] SHUFFLE [RS_75] @@ -279,103 +279,103 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_72] (rows=154681759 width=322) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_369] (rows=154681759 width=322) - Conds:RS_69._col1=RS_449._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] + Merge Join Operator [MERGEJOIN_371] (rows=154681759 width=322) + Conds:RS_69._col1=RS_451._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] <-Map 32 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_449] + SHUFFLE [RS_451] PartitionCols:_col0 - Select Operator [SEL_448] (rows=46000 width=104) + Select Operator [SEL_450] (rows=46000 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_447] (rows=46000 width=104) + Filter Operator [FIL_449] (rows=46000 width=104) predicate:cp_catalog_page_sk is not null TableScan [TS_54] (rows=46000 width=104) default@catalog_page,catalog_page,Tbl:COMPLETE,Col:COMPLETE,Output:["cp_catalog_page_sk","cp_catalog_page_id"] <-Reducer 16 [SIMPLE_EDGE] SHUFFLE [RS_69] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_368] (rows=154681759 width=226) - Conds:RS_66._col3=RS_423._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_370] (rows=154681759 width=226) + Conds:RS_66._col3=RS_425._col0(Inner),Output:["_col1","_col5","_col6","_col9","_col10"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_423] + SHUFFLE [RS_425] PartitionCols:_col0 - Select Operator [SEL_421] (rows=1150 width=4) + Select Operator [SEL_423] (rows=1150 width=4) Output:["_col0"] - Filter Operator [FIL_420] (rows=1150 width=89) + Filter Operator [FIL_422] (rows=1150 width=89) predicate:((p_channel_tv = 'N') and p_promo_sk is not null) TableScan [TS_12] (rows=2300 width=89) default@promotion,promotion,Tbl:COMPLETE,Col:COMPLETE,Output:["p_promo_sk","p_channel_tv"] <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_66] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_367] (rows=154681759 width=230) - Conds:RS_63._col2=RS_418._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_369] (rows=154681759 width=230) + Conds:RS_63._col2=RS_420._col0(Inner),Output:["_col1","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_418] + SHUFFLE [RS_420] PartitionCols:_col0 - Select Operator [SEL_416] (rows=154000 width=4) + Select Operator [SEL_418] (rows=154000 width=4) Output:["_col0"] - Filter Operator [FIL_415] (rows=154000 width=115) + Filter Operator [FIL_417] (rows=154000 width=115) predicate:((i_current_price > 50) and i_item_sk is not null) TableScan [TS_9] (rows=462000 width=115) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_current_price"] <-Reducer 14 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_366] (rows=464045263 width=322) - Conds:RS_60._col0=RS_397._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_368] (rows=464045263 width=322) + Conds:RS_60._col0=RS_399._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_397] + SHUFFLE [RS_399] PartitionCols:_col0 - Select Operator [SEL_394] (rows=8116 width=4) + Select Operator [SEL_396] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_393] (rows=8116 width=98) + Filter Operator [FIL_395] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' and d_date_sk is not null) TableScan [TS_6] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Reducer 30 [SIMPLE_EDGE] SHUFFLE [RS_60] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_365] (rows=464045263 width=326) - Conds:RS_443._col2, _col4=RS_446._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_367] (rows=464045263 width=326) + Conds:RS_445._col2, _col4=RS_448._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 29 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_443] + SHUFFLE [RS_445] PartitionCols:_col2, _col4 - Select Operator [SEL_442] (rows=283691906 width=243) + Select Operator [SEL_444] (rows=283691906 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_441] (rows=283691906 width=243) + Filter Operator [FIL_443] (rows=283691906 width=243) predicate:(cs_promo_sk is not null and cs_sold_date_sk is not null and cs_catalog_page_sk is not null and cs_item_sk is not null and cs_sold_date_sk BETWEEN DynamicValue(RS_61_date_dim_d_date_sk_min) AND DynamicValue(RS_61_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_61_date_dim_d_date_sk_bloom_filter))) TableScan [TS_39] (rows=287989836 width=243) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_catalog_page_sk","cs_item_sk","cs_promo_sk","cs_order_number","cs_ext_sales_price","cs_net_profit"] <-Reducer 19 [BROADCAST_EDGE] vectorized - BROADCAST [RS_440] - Group By Operator [GBY_439] (rows=1 width=12) + BROADCAST [RS_442] + Group By Operator [GBY_441] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_405] - Group By Operator [GBY_402] (rows=1 width=12) + SHUFFLE [RS_407] + Group By Operator [GBY_404] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_398] (rows=8116 width=4) + Select Operator [SEL_400] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_394] + Please refer to the previous Select Operator [SEL_396] <-Map 31 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_446] + SHUFFLE [RS_448] PartitionCols:_col0, _col1 - Select Operator [SEL_445] (rows=28798881 width=227) + Select Operator [SEL_447] (rows=28798881 width=227) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_444] (rows=28798881 width=227) + Filter Operator [FIL_446] (rows=28798881 width=227) predicate:(cr_item_sk is not null and cr_order_number is not null) TableScan [TS_42] (rows=28798881 width=227) default@catalog_returns,catalog_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["cr_item_sk","cr_order_number","cr_return_amount","cr_net_loss"] <-Reducer 24 [CONTAINS] vectorized - Reduce Output Operator [RS_470] + Reduce Output Operator [RS_472] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_469] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_468] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_467] (rows=53 width=615) + Top N Key Operator [TNK_471] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_470] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_469] (rows=53 width=615) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_466] (rows=53 width=436) + Group By Operator [GBY_468] (rows=53 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 23 [SIMPLE_EDGE] SHUFFLE [RS_115] @@ -384,88 +384,88 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_112] (rows=84869669 width=323) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_374] (rows=84869669 width=323) - Conds:RS_109._col2=RS_465._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] + Merge Join Operator [MERGEJOIN_376] (rows=84869669 width=323) + Conds:RS_109._col2=RS_467._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] <-Map 36 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_465] + SHUFFLE [RS_467] PartitionCols:_col0 - Select Operator [SEL_464] (rows=84 width=104) + Select Operator [SEL_466] (rows=84 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_463] (rows=84 width=104) + Filter Operator [FIL_465] (rows=84 width=104) predicate:web_site_sk is not null TableScan [TS_94] (rows=84 width=104) default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE,Output:["web_site_sk","web_site_id"] <-Reducer 22 [SIMPLE_EDGE] SHUFFLE [RS_109] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_373] (rows=84869669 width=227) - Conds:RS_106._col3=RS_424._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_375] (rows=84869669 width=227) + Conds:RS_106._col3=RS_426._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_424] + SHUFFLE [RS_426] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_421] + Please refer to the previous Select Operator [SEL_423] <-Reducer 21 [SIMPLE_EDGE] SHUFFLE [RS_106] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_372] (rows=84869669 width=231) - Conds:RS_103._col1=RS_419._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_374] (rows=84869669 width=231) + Conds:RS_103._col1=RS_421._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_419] + SHUFFLE [RS_421] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_416] + Please refer to the previous Select Operator [SEL_418] <-Reducer 20 [SIMPLE_EDGE] SHUFFLE [RS_103] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_371] (rows=254608997 width=359) - Conds:RS_100._col0=RS_399._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_373] (rows=254608997 width=359) + Conds:RS_100._col0=RS_401._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_399] + SHUFFLE [RS_401] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_394] + Please refer to the previous Select Operator [SEL_396] <-Reducer 34 [SIMPLE_EDGE] SHUFFLE [RS_100] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_370] (rows=254608997 width=363) - Conds:RS_459._col1, _col4=RS_462._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_372] (rows=254608997 width=363) + Conds:RS_461._col1, _col4=RS_464._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 33 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_459] + SHUFFLE [RS_461] PartitionCols:_col1, _col4 - Select Operator [SEL_458] (rows=143894769 width=243) + Select Operator [SEL_460] (rows=143894769 width=243) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_457] (rows=143894769 width=243) + Filter Operator [FIL_459] (rows=143894769 width=243) predicate:(ws_promo_sk is not null and ws_web_site_sk is not null and ws_sold_date_sk is not null and ws_item_sk is not null and ws_sold_date_sk BETWEEN DynamicValue(RS_101_date_dim_d_date_sk_min) AND DynamicValue(RS_101_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_101_date_dim_d_date_sk_bloom_filter))) TableScan [TS_79] (rows=144002668 width=243) default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_item_sk","ws_web_site_sk","ws_promo_sk","ws_order_number","ws_ext_sales_price","ws_net_profit"] <-Reducer 25 [BROADCAST_EDGE] vectorized - BROADCAST [RS_456] - Group By Operator [GBY_455] (rows=1 width=12) + BROADCAST [RS_458] + Group By Operator [GBY_457] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_406] - Group By Operator [GBY_403] (rows=1 width=12) + SHUFFLE [RS_408] + Group By Operator [GBY_405] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_400] (rows=8116 width=4) + Select Operator [SEL_402] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_394] + Please refer to the previous Select Operator [SEL_396] <-Map 35 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_462] + SHUFFLE [RS_464] PartitionCols:_col0, _col1 - Select Operator [SEL_461] (rows=14398467 width=221) + Select Operator [SEL_463] (rows=14398467 width=221) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_460] (rows=14398467 width=221) + Filter Operator [FIL_462] (rows=14398467 width=221) predicate:(wr_item_sk is not null and wr_order_number is not null) TableScan [TS_82] (rows=14398467 width=221) default@web_returns,web_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["wr_item_sk","wr_order_number","wr_return_amt","wr_net_loss"] <-Reducer 7 [CONTAINS] vectorized - Reduce Output Operator [RS_432] + Reduce Output Operator [RS_434] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_431] (rows=59581 width=627) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L - Top N Key Operator [TNK_430] (rows=39721 width=618) - keys:_col0, _col1, 0L,top n:100 - Select Operator [SEL_429] (rows=822 width=617) + Top N Key Operator [TNK_433] (rows=59581 width=627) + keys:_col0, _col1,top n:100 + Group By Operator [GBY_432] (rows=59581 width=627) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col2)","sum(_col3)","sum(_col4)"],keys:_col0, _col1, 0L + Select Operator [SEL_431] (rows=822 width=617) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_428] (rows=822 width=436) + Group By Operator [GBY_430] (rows=822 width=436) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"],keys:KEY._col0 <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_36] @@ -474,75 +474,75 @@ Stage-0 Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(_col1)","sum(_col2)","sum(_col3)"],keys:_col0 Select Operator [SEL_33] (rows=270716624 width=305) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_364] (rows=270716624 width=305) - Conds:RS_30._col2=RS_427._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] + Merge Join Operator [MERGEJOIN_366] (rows=270716624 width=305) + Conds:RS_30._col2=RS_429._col0(Inner),Output:["_col5","_col6","_col9","_col10","_col15"] <-Map 28 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_427] + SHUFFLE [RS_429] PartitionCols:_col0 - Select Operator [SEL_426] (rows=1704 width=104) + Select Operator [SEL_428] (rows=1704 width=104) Output:["_col0","_col1"] - Filter Operator [FIL_425] (rows=1704 width=104) + Filter Operator [FIL_427] (rows=1704 width=104) predicate:s_store_sk is not null TableScan [TS_15] (rows=1704 width=104) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_store_id"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_363] (rows=270716624 width=208) - Conds:RS_27._col3=RS_422._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_365] (rows=270716624 width=208) + Conds:RS_27._col3=RS_424._col0(Inner),Output:["_col2","_col5","_col6","_col9","_col10"] <-Map 27 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_422] + SHUFFLE [RS_424] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_421] + Please refer to the previous Select Operator [SEL_423] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_362] (rows=270716624 width=212) - Conds:RS_24._col1=RS_417._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_364] (rows=270716624 width=212) + Conds:RS_24._col1=RS_419._col0(Inner),Output:["_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 26 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_417] + SHUFFLE [RS_419] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_416] + Please refer to the previous Select Operator [SEL_418] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_361] (rows=812149846 width=370) - Conds:RS_21._col0=RS_395._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_363] (rows=812149846 width=370) + Conds:RS_21._col0=RS_397._col0(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 12 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_395] + SHUFFLE [RS_397] PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_394] + Please refer to the previous Select Operator [SEL_396] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_360] (rows=812149846 width=374) - Conds:RS_411._col1, _col4=RS_414._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] + Merge Join Operator [MERGEJOIN_362] (rows=812149846 width=374) + Conds:RS_413._col1, _col4=RS_416._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col9","_col10"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_411] + SHUFFLE [RS_413] PartitionCols:_col1, _col4 - Select Operator [SEL_410] (rows=501693263 width=233) + Select Operator [SEL_412] (rows=501693263 width=233) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Filter Operator [FIL_409] (rows=501693263 width=233) + Filter Operator [FIL_411] (rows=501693263 width=233) predicate:(ss_sold_date_sk is not null and ss_promo_sk is not null and ss_store_sk is not null and ss_item_sk is not null and ss_sold_date_sk BETWEEN DynamicValue(RS_22_date_dim_d_date_sk_min) AND DynamicValue(RS_22_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_22_date_dim_d_date_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=233) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_item_sk","ss_store_sk","ss_promo_sk","ss_ticket_number","ss_ext_sales_price","ss_net_profit"] <-Reducer 13 [BROADCAST_EDGE] vectorized - BROADCAST [RS_408] - Group By Operator [GBY_407] (rows=1 width=12) + BROADCAST [RS_410] + Group By Operator [GBY_409] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 12 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_404] - Group By Operator [GBY_401] (rows=1 width=12) + SHUFFLE [RS_406] + Group By Operator [GBY_403] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_396] (rows=8116 width=4) + Select Operator [SEL_398] (rows=8116 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_394] + Please refer to the previous Select Operator [SEL_396] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_414] + SHUFFLE [RS_416] PartitionCols:_col0, _col1 - Select Operator [SEL_413] (rows=57591150 width=224) + Select Operator [SEL_415] (rows=57591150 width=224) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_412] (rows=57591150 width=224) + Filter Operator [FIL_414] (rows=57591150 width=224) predicate:(sr_item_sk is not null and sr_ticket_number is not null) TableScan [TS_3] (rows=57591150 width=224) default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_item_sk","sr_ticket_number","sr_return_amt","sr_net_loss"] diff --git ql/src/test/results/clientpositive/perf/tez/query82.q.out ql/src/test/results/clientpositive/perf/tez/query82.q.out index c7721acffe..07f1943b3d 100644 --- ql/src/test/results/clientpositive/perf/tez/query82.q.out +++ ql/src/test/results/clientpositive/perf/tez/query82.q.out @@ -56,78 +56,78 @@ Stage-0 limit:100 Stage-1 Reducer 5 vectorized - File Output Operator [FS_101] - Limit [LIM_100] (rows=1 width=396) + File Output Operator [FS_106] + Limit [LIM_105] (rows=1 width=396) Number of rows:100 - Select Operator [SEL_99] (rows=1 width=396) + Select Operator [SEL_104] (rows=1 width=396) Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_98] - Group By Operator [GBY_97] (rows=1 width=396) + SHUFFLE [RS_103] + Group By Operator [GBY_102] (rows=1 width=396) Output:["_col0","_col1","_col2"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_23] (rows=2 width=396) Output:["_col0","_col1","_col2"],keys:_col2, _col3, _col4 - Top N Key Operator [TNK_43] (rows=11627 width=396) + Top N Key Operator [TNK_48] (rows=11627 width=396) keys:_col2, _col3, _col4,top n:100 - Merge Join Operator [MERGEJOIN_79] (rows=11627 width=396) + Merge Join Operator [MERGEJOIN_84] (rows=11627 width=396) Conds:RS_19._col1=RS_20._col1(Inner),Output:["_col2","_col3","_col4"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_77] (rows=3564040 width=400) - Conds:RS_90._col0=RS_82._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + Merge Join Operator [MERGEJOIN_82] (rows=3564040 width=400) + Conds:RS_95._col0=RS_87._col0(Inner),Output:["_col1","_col2","_col3","_col4"] <-Map 6 [SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_82] + PARTITION_ONLY_SHUFFLE [RS_87] PartitionCols:_col0 - Select Operator [SEL_81] (rows=297 width=400) + Select Operator [SEL_86] (rows=297 width=400) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_80] (rows=297 width=404) + Filter Operator [FIL_85] (rows=297 width=404) predicate:(i_current_price BETWEEN 30 AND 60 and (i_manufact_id) IN (437, 129, 727, 663) and i_item_sk is not null) TableScan [TS_3] (rows=462000 width=403) default@item,item,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_item_id","i_item_desc","i_current_price","i_manufact_id"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_90] + SHUFFLE [RS_95] PartitionCols:_col0 - Select Operator [SEL_89] (rows=575995635 width=4) + Select Operator [SEL_94] (rows=575995635 width=4) Output:["_col0"] - Filter Operator [FIL_88] (rows=575995635 width=4) + Filter Operator [FIL_93] (rows=575995635 width=4) predicate:(ss_item_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) TableScan [TS_0] (rows=575995635 width=4) default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"] <-Reducer 7 [BROADCAST_EDGE] vectorized - BROADCAST [RS_87] - Group By Operator [GBY_86] (rows=1 width=12) + BROADCAST [RS_92] + Group By Operator [GBY_91] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 6 [CUSTOM_SIMPLE_EDGE] vectorized - PARTITION_ONLY_SHUFFLE [RS_85] - Group By Operator [GBY_84] (rows=1 width=12) + PARTITION_ONLY_SHUFFLE [RS_90] + Group By Operator [GBY_89] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_83] (rows=297 width=4) + Select Operator [SEL_88] (rows=297 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_81] + Please refer to the previous Select Operator [SEL_86] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_20] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_78] (rows=1879072 width=4) - Conds:RS_93._col0=RS_96._col0(Inner),Output:["_col1"] + Merge Join Operator [MERGEJOIN_83] (rows=1879072 width=4) + Conds:RS_98._col0=RS_101._col0(Inner),Output:["_col1"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_96] + SHUFFLE [RS_101] PartitionCols:_col0 - Select Operator [SEL_95] (rows=8116 width=4) + Select Operator [SEL_100] (rows=8116 width=4) Output:["_col0"] - Filter Operator [FIL_94] (rows=8116 width=98) + Filter Operator [FIL_99] (rows=8116 width=98) predicate:(CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2002-05-30 00:00:00' AND TIMESTAMP'2002-07-29 00:00:00' and d_date_sk is not null) TableScan [TS_9] (rows=73049 width=98) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_date"] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_93] + SHUFFLE [RS_98] PartitionCols:_col0 - Select Operator [SEL_92] (rows=16912800 width=8) + Select Operator [SEL_97] (rows=16912800 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_91] (rows=16912800 width=11) + Filter Operator [FIL_96] (rows=16912800 width=11) predicate:(inv_quantity_on_hand BETWEEN 100 AND 500 and inv_item_sk is not null and inv_date_sk is not null) TableScan [TS_6] (rows=37584000 width=11) default@inventory,inventory,Tbl:COMPLETE,Col:COMPLETE,Output:["inv_date_sk","inv_item_sk","inv_quantity_on_hand"] diff --git ql/src/test/results/clientpositive/perf/tez/query99.q.out ql/src/test/results/clientpositive/perf/tez/query99.q.out index c01122f435..d24d5ccc60 100644 --- ql/src/test/results/clientpositive/perf/tez/query99.q.out +++ ql/src/test/results/clientpositive/perf/tez/query99.q.out @@ -95,94 +95,94 @@ Stage-0 limit:-1 Stage-1 Reducer 7 vectorized - File Output Operator [FS_126] - Limit [LIM_125] (rows=100 width=590) + File Output Operator [FS_131] + Limit [LIM_130] (rows=100 width=590) Number of rows:100 - Select Operator [SEL_124] (rows=3920468 width=590) + Select Operator [SEL_129] (rows=3920468 width=590) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_123] - Select Operator [SEL_122] (rows=3920468 width=590) + SHUFFLE [RS_128] + Select Operator [SEL_127] (rows=3920468 width=590) Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_121] (rows=3920468 width=406) + Group By Operator [GBY_126] (rows=3920468 width=406) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_28] (rows=7840936 width=406) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col13, _col15, _col11 - Top N Key Operator [TNK_56] (rows=15681873 width=386) + Top N Key Operator [TNK_59] (rows=15681873 width=386) keys:_col13, _col15, _col11,top n:100 - Merge Join Operator [MERGEJOIN_100] (rows=15681873 width=386) - Conds:RS_24._col2=RS_103._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"] + Merge Join Operator [MERGEJOIN_105] (rows=15681873 width=386) + Conds:RS_24._col2=RS_108._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"] <-Map 11 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_103] + SHUFFLE [RS_108] PartitionCols:_col0 - Select Operator [SEL_102] (rows=1 width=88) + Select Operator [SEL_107] (rows=1 width=88) Output:["_col0","_col1"] - Filter Operator [FIL_101] (rows=1 width=88) + Filter Operator [FIL_106] (rows=1 width=88) predicate:sm_ship_mode_sk is not null TableScan [TS_12] (rows=1 width=88) default@ship_mode,ship_mode,Tbl:COMPLETE,Col:COMPLETE,Output:["sm_ship_mode_sk","sm_type"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_99] (rows=282273729 width=305) - Conds:RS_21._col3=RS_120._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] + Merge Join Operator [MERGEJOIN_104] (rows=282273729 width=305) + Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] <-Map 10 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_120] + SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_119] (rows=27 width=188) + Select Operator [SEL_124] (rows=27 width=188) Output:["_col0","_col1"] - Filter Operator [FIL_118] (rows=27 width=104) + Filter Operator [FIL_123] (rows=27 width=104) predicate:w_warehouse_sk is not null TableScan [TS_9] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_98] (rows=282273729 width=125) - Conds:RS_18._col1=RS_117._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] + Merge Join Operator [MERGEJOIN_103] (rows=282273729 width=125) + Conds:RS_18._col1=RS_122._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col11"] <-Map 9 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_117] + SHUFFLE [RS_122] PartitionCols:_col0 - Select Operator [SEL_116] (rows=60 width=102) + Select Operator [SEL_121] (rows=60 width=102) Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=60 width=102) + Filter Operator [FIL_120] (rows=60 width=102) predicate:cc_call_center_sk is not null TableScan [TS_6] (rows=60 width=102) default@call_center,call_center,Tbl:COMPLETE,Col:COMPLETE,Output:["cc_call_center_sk","cc_name"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_97] (rows=282273729 width=31) - Conds:RS_111._col0=RS_114._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] + Merge Join Operator [MERGEJOIN_102] (rows=282273729 width=31) + Conds:RS_116._col0=RS_119._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_111] + SHUFFLE [RS_116] PartitionCols:_col0 - Select Operator [SEL_110] (rows=282273729 width=35) + Select Operator [SEL_115] (rows=282273729 width=35) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Filter Operator [FIL_109] (rows=282273729 width=19) + Filter Operator [FIL_114] (rows=282273729 width=19) predicate:(cs_warehouse_sk is not null and cs_ship_date_sk is not null and cs_ship_mode_sk is not null and cs_call_center_sk is not null and cs_ship_mode_sk BETWEEN DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_min) AND DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_max) and in_bloom_filter(cs_ship_mode_sk, DynamicValue(RS_25_ship_mode_sm_ship_mode_sk_bloom_filter))) TableScan [TS_0] (rows=287989836 width=19) default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_date_sk","cs_call_center_sk","cs_ship_mode_sk","cs_warehouse_sk"] <-Reducer 12 [BROADCAST_EDGE] vectorized - BROADCAST [RS_108] - Group By Operator [GBY_107] (rows=1 width=12) + BROADCAST [RS_113] + Group By Operator [GBY_112] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"] <-Map 11 [CUSTOM_SIMPLE_EDGE] vectorized - SHUFFLE [RS_106] - Group By Operator [GBY_105] (rows=1 width=12) + SHUFFLE [RS_111] + Group By Operator [GBY_110] (rows=1 width=12) Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"] - Select Operator [SEL_104] (rows=1 width=4) + Select Operator [SEL_109] (rows=1 width=4) Output:["_col0"] - Please refer to the previous Select Operator [SEL_102] + Please refer to the previous Select Operator [SEL_107] <-Map 8 [SIMPLE_EDGE] vectorized - SHUFFLE [RS_114] + SHUFFLE [RS_119] PartitionCols:_col0 - Select Operator [SEL_113] (rows=317 width=4) + Select Operator [SEL_118] (rows=317 width=4) Output:["_col0"] - Filter Operator [FIL_112] (rows=317 width=8) + Filter Operator [FIL_117] (rows=317 width=8) predicate:(d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) TableScan [TS_3] (rows=73049 width=8) default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] diff --git ql/src/test/results/clientpositive/tez/topnkey.q.out ql/src/test/results/clientpositive/tez/topnkey.q.out index cf2ecf7133..4b84f665f7 100644 --- ql/src/test/results/clientpositive/tez/topnkey.q.out +++ ql/src/test/results/clientpositive/tez/topnkey.q.out @@ -1,178 +1,58 @@ -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### -OPTIMIZED SQL: SELECT `key` AS `$f0`, SUM(CAST(SUBSTR(`value`, 5) AS INTEGER)) AS `$f1` -FROM `default`.`src` -GROUP BY `key` -ORDER BY `key` -LIMIT 5 -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +Plan optimized by CBO. -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false - Select Operator - expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: _col0 (type: string) - null sort order: z - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - top n: 5 - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - minReductionHashAggr: 0.5 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - auto parallelism: true - Path -> Alias: - hdfs://### HDFS PATH ### [src] - Path -> Partition: - hdfs://### HDFS PATH ### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - location hdfs://### HDFS PATH ### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - location hdfs://### HDFS PATH ### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Reducer 2 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - auto parallelism: false - Reducer 3 - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - GlobalTableId: 0 - directory: hdfs://### HDFS PATH ### - NumFilesPerFileSink: 1 - Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE - Stats Publishing Key Prefix: hdfs://### HDFS PATH ### - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 3 + File Output Operator [FS_10] + Limit [LIM_9] (rows=5 width=95) + Number of rows:5 + Select Operator [SEL_8] (rows=250 width=95) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_7] + Group By Operator [GBY_5] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0 + Group By Operator [GBY_3] (rows=250 width=95) + Output:["_col0","_col1"],aggregations:["sum(_col1)"],keys:_col0 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + Top N Key Operator [TNK_13] + keys:key,top n:5 + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +0 0 +10 10 +100 200 +103 206 +104 208 PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -187,180 +67,335 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 103 206 104 208 PREHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:5 Stage-1 - Reducer 3 - File Output Operator [FS_9] - Limit [LIM_8] (rows=5 width=87) + Reducer 4 + File Output Operator [FS_16] + Limit [LIM_15] (rows=5 width=178) Number of rows:5 - Select Operator [SEL_7] (rows=250 width=87) - Output:["_col0"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_6] - Group By Operator [GBY_4] (rows=250 width=87) - Output:["_col0"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_3] - PartitionCols:_col0 - Group By Operator [GBY_2] (rows=250 width=87) - Output:["_col0"],keys:key - Top N Key Operator [TNK_10] (rows=500 width=87) - keys:key,top n:5 - Select Operator [SEL_1] (rows=500 width=87) - Output:["key"] - TableScan [TS_0] (rows=500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + Select Operator [SEL_14] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_13] + Group By Operator [GBY_11] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col0, _col1 + Group By Operator [GBY_9] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_27] + keys:_col0, _col2,top n:5 + Top N Key Operator [TNK_30] + keys:_col0,top n:5 + Merge Join Operator [MERGEJOIN_26] (rows=791 width=178) + Conds:RS_5._col0=RS_6._col0(Left Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_4] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_18] (rows=500 width=178) + predicate:key is not null + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] -PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### -0 -10 -100 -103 -104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. -STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src1 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Map 4 - Map Operator Tree: - TableScan - alias: src2 - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 2 - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Stage: Stage-0 - Fetch Operator - limit: 5 - Processor Tree: - ListSink +Stage-0 + Fetch Operator + limit:5 + Stage-1 + Reducer 4 + File Output Operator [FS_16] + Limit [LIM_15] (rows=5 width=178) + Number of rows:5 + Select Operator [SEL_14] (rows=395 width=178) + Output:["_col0","_col1"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_13] + Group By Operator [GBY_11] (rows=395 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_10] + PartitionCols:_col0, _col1 + Group By Operator [GBY_9] (rows=395 width=178) + Output:["_col0","_col1"],keys:_col0, _col2 + Top N Key Operator [TNK_27] + keys:_col0, _col2,top n:5 + Merge Join Operator [MERGEJOIN_26] (rows=791 width=178) + Conds:RS_5._col0=RS_6._col0(Left Outer),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_5] + PartitionCols:_col0 + Select Operator [SEL_1] (rows=500 width=87) + Output:["_col0"] + Top N Key Operator [TNK_28] + keys:key,top n:5 + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_4] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_18] (rows=500 width=178) + predicate:key is not null + TableScan [TS_2] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: hdfs://### HDFS PATH ### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: hdfs://### HDFS PATH ### 0 val_0 -0 val_0 -0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.a SCRIPT [] +POSTHOOK: Lineage: t_test.b SCRIPT [] +POSTHOOK: Lineage: t_test.c SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:3 + Stage-1 + Reducer 2 + File Output Operator [FS_5] + Limit [LIM_4] (rows=3 width=8) + Number of rows:3 + Select Operator [SEL_3] (rows=8 width=8) + Output:["_col0","_col1"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=8 width=8) + Output:["_col0","_col1"] + Top N Key Operator [TNK_6] (rows=8 width=8) + keys:a, b,top n:3 + TableScan [TS_0] (rows=8 width=8) + default@t_test,t_test,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] + +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 1 +5 1 +5 1 +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 1 +5 1 +5 1 +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:3 + Stage-1 + Reducer 3 + File Output Operator [FS_9] + Limit [LIM_8] (rows=3 width=8) + Number of rows:3 + Select Operator [SEL_7] (rows=4 width=8) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_6] + Group By Operator [GBY_4] (rows=4 width=8) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1 + Group By Operator [GBY_2] (rows=4 width=8) + Output:["_col0","_col1"],keys:a, b + Select Operator [SEL_1] (rows=8 width=8) + Output:["a","b"] + Top N Key Operator [TNK_12] + keys:a, b,top n:3 + TableScan [TS_0] (rows=8 width=8) + default@t_test,t_test,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"] + +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 1 +5 2 +6 2 +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +POSTHOOK: Output: hdfs://### HDFS PATH ### +5 1 +5 2 +6 2 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test diff --git ql/src/test/results/clientpositive/tez/vector_topnkey.q.out ql/src/test/results/clientpositive/tez/vector_topnkey.q.out index d179013e28..03362eeb38 100644 --- ql/src/test/results/clientpositive/tez/vector_topnkey.q.out +++ ql/src/test/results/clientpositive/tez/vector_topnkey.q.out @@ -87,24 +87,23 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:cint1:int, 1:cint2:int, 2:cdouble:double, 3:cvarchar:varchar(50), 4:cdecimal1:decimal(10,2)/DECIMAL_64, 5:cdecimal2:decimal(38,5), 6:ROW__ID:struct] - Select Operator - expressions: cint1 (type: int) - outputColumnNames: cint1 - Select Vectorization: - className: VectorSelectOperator + Top N Key Operator + sort order: + + keys: cint1 (type: int) + null sort order: z + top n: 3 + Top N Key Vectorization: + className: VectorTopNKeyOperator + keyExpressions: col 0:int native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: + - keys: cint1 (type: int) - null sort order: z - Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE - top n: 3 - Top N Key Vectorization: - className: VectorTopNKeyOperator - keyExpressions: col 0:int + Select Operator + expressions: cint1 (type: int) + outputColumnNames: cint1 + Select Vectorization: + className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 14 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -117,7 +116,7 @@ STAGE PLANS: minReductionHashAggr: 0.64285713 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -128,7 +127,7 @@ STAGE PLANS: keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -173,7 +172,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -183,7 +182,7 @@ STAGE PLANS: keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized @@ -208,7 +207,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 5 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 Limit Vectorization: diff --git ql/src/test/results/clientpositive/topnkey.q.out ql/src/test/results/clientpositive/topnkey.q.out index cecbe89b1c..6d4a822588 100644 --- ql/src/test/results/clientpositive/topnkey.q.out +++ ql/src/test/results/clientpositive/topnkey.q.out @@ -1,18 +1,13 @@ -PREHOOK: query: EXPLAIN EXTENDED +PREHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: EXPLAIN EXTENDED +POSTHOOK: query: EXPLAIN SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -OPTIMIZED SQL: SELECT `key` AS `$f0`, SUM(CAST(SUBSTR(`value`, 5) AS INTEGER)) AS `$f1` -FROM `default`.`src` -GROUP BY `key` -ORDER BY `key` -LIMIT 5 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 @@ -25,7 +20,6 @@ STAGE PLANS: TableScan alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - GatherStats: false Select Operator expressions: key (type: string), UDFToInteger(substr(value, 5)) (type: int) outputColumnNames: _col0, _col1 @@ -43,65 +37,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} - bucket_count -1 - bucketing_version 2 - column.name.delimiter , - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [$hdt$_0:src] - Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -111,66 +48,22 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - GatherStats: false Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - TopN: 5 TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -mr-10004 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - column.name.delimiter , - columns _col0,_col1 - columns.types string,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Truncated Path -> Alias: -#### A masked pattern was here #### - Needs Tagging: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) @@ -181,26 +74,11 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 Statistics: Num rows: 5 Data size: 475 Basic stats: COMPLETE Column stats: COMPLETE -#### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string:bigint - escape.delim \ - hive.serialization.extend.additional.nesting.levels true - serialization.escape.crlf true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Stage: Stage-0 Fetch Operator @@ -208,6 +86,19 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +10 10 +100 200 +103 206 +104 208 PREHOOK: query: SELECT key, SUM(CAST(SUBSTR(value,5) AS INT)) FROM src GROUP BY key ORDER BY key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -222,50 +113,95 @@ POSTHOOK: Input: default@src 103 206 104 208 PREHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: query: EXPLAIN -SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) - outputColumnNames: key + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + alias: src2 + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) - null sort order: z + null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: za + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -273,7 +209,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -281,19 +217,20 @@ STAGE PLANS: key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 435 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -305,37 +242,47 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT key FROM src GROUP BY key ORDER BY key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0 -10 -100 -103 -104 -PREHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: explain vectorization detail -SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -343,21 +290,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 - filterExpr: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: src2 filterExpr: key is not null (type: boolean) @@ -379,16 +322,18 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Inner Join 0 to 1 + Left Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) + Group By Operator + keys: _col0 (type: string), _col2 (type: string) + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -397,21 +342,45 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string) - null sort order: z + null sort order: a sort order: + - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 395 Data size: 70310 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE @@ -429,16 +398,253 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) ORDER BY src1.key LIMIT 5 +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT src1.key, src2.value FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key) GROUP BY src1.key, src2.value ORDER BY src1.key NULLS FIRST LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### 0 val_0 -0 val_0 -0 val_0 -0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +PREHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t_test +POSTHOOK: query: CREATE TABLE t_test( + a int, + b int, + c int +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t_test +PREHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t_test +POSTHOOK: query: INSERT INTO t_test VALUES +(5, 2, 3), +(6, 2, 1), +(7, 8, 4), (7, 8, 4), (7, 8, 4), +(5, 1, 2), (5, 1, 2), (5, 1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t_test +POSTHOOK: Lineage: t_test.a SCRIPT [] +POSTHOOK: Lineage: t_test.b SCRIPT [] +POSTHOOK: Lineage: t_test.c SCRIPT [] +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 1 +5 1 +PREHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 1 +5 1 +PREHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t_test + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: int), b (type: int) + outputColumnNames: a, b + Statistics: Num rows: 8 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: a (type: int), b (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 3 + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 3 + Processor Tree: + ListSink + +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 2 +6 2 +PREHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t_test +#### A masked pattern was here #### +POSTHOOK: query: SELECT a, b FROM t_test GROUP BY a, b ORDER BY a, b LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t_test +#### A masked pattern was here #### +5 1 +5 2 +6 2 +PREHOOK: query: DROP TABLE t_test +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t_test +PREHOOK: Output: default@t_test +POSTHOOK: query: DROP TABLE t_test +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t_test +POSTHOOK: Output: default@t_test