diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 3af1cbc..b1c2378 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -32,6 +32,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -371,7 +372,13 @@ private static String indentString(int indent) { private JSONObject outputMap(Map mp, boolean hasHeader, PrintStream out, boolean extended, boolean jsonOutput, int indent) throws Exception { - TreeMap tree = new TreeMap(); + Map tree; + // let's honor the original order if mp is a LinkedHashMap + if (mp instanceof LinkedHashMap) { + tree = new LinkedHashMap(); + } else { + tree = new TreeMap(); + } tree.putAll(mp); JSONObject json = jsonOutput ? new JSONObject() : null; if (out != null && hasHeader && !mp.isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java index b2c6cf3..11f4236 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java @@ -18,23 +18,15 @@ package org.apache.hadoop.hive.ql.exec.spark; -import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Queue; -import java.util.Set; import com.google.common.base.Preconditions; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.io.merge.MergeFileMapper; import org.apache.hadoop.hive.ql.io.merge.MergeFileOutputFormat; import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; @@ -70,7 +62,7 @@ private Context context; private Path scratchDir; private SparkReporter sparkReporter; - private final Map cloneToWork; + private Map cloneToWork; private final Map workToTranMap; private final Map workToParentWorkTranMap; @@ -85,7 +77,6 @@ public SparkPlanGenerator( this.context = context; this.jobConf = jobConf; this.scratchDir = scratchDir; - this.cloneToWork = new HashMap(); this.workToTranMap = new HashMap(); this.workToParentWorkTranMap = new HashMap(); this.sparkReporter = sparkReporter; @@ -93,12 +84,10 @@ public SparkPlanGenerator( public SparkPlan generate(SparkWork sparkWork) throws Exception { SparkPlan sparkPlan = new SparkPlan(); - cloneToWork.clear(); + cloneToWork = sparkWork.getCloneToWork(); workToTranMap.clear(); workToParentWorkTranMap.clear(); - splitSparkWork(sparkWork); - for (BaseWork work : sparkWork.getAllWork()) { SparkTran tran; if (work instanceof MapWork) { @@ -159,105 +148,6 @@ private SparkTran generateParentTran(SparkPlan sparkPlan, SparkWork sparkWork, B return result; } - - private void splitSparkWork(SparkWork sparkWork) { - // do a BFS on the sparkWork graph, and look for any work that has more than one child. - // If we found such a work, we split it into multiple ones, one for each of its child. - Queue queue = new LinkedList(); - Set visited = new HashSet(); - queue.addAll(sparkWork.getRoots()); - while (!queue.isEmpty()) { - BaseWork work = queue.poll(); - if (!visited.add(work)) { - continue; - } - - List childWorks = sparkWork.getChildren(work); - // First, add all children of this work into queue, to be processed later. - for (BaseWork w : childWorks) { - queue.add(w); - } - - // Second, check if this work has multiple reduceSinks. If so, do split. - splitBaseWork(sparkWork, work, childWorks); - } - } - - private Set> getAllReduceSinks(BaseWork work) { - Set> resultSet = work.getAllLeafOperators(); - Iterator> it = resultSet.iterator(); - while (it.hasNext()) { - if (!(it.next() instanceof ReduceSinkOperator)) { - it.remove(); - } - } - return resultSet; - } - - // Split work into multiple branches, one for each childWork in childWorks. - // It also set up the connection between each parent work and child work. - private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List childWorks) { - if (getAllReduceSinks(parentWork).size() <= 1) { - return; - } - - // Grand-parent works - we need to set these to be the parents of the cloned works. - List grandParentWorks = sparkWork.getParents(parentWork); - boolean isFirst = true; - - for (BaseWork childWork : childWorks) { - BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork); - String childReducerName = childWork.getName(); - SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork); - - // We need to remove those branches that - // 1, ended with a ReduceSinkOperator, and - // 2, the ReduceSinkOperator's name is not the same as childReducerName. - // Also, if the cloned work is not the first, we remove ALL leaf operators except - // the corresponding ReduceSinkOperator. - for (Operator op : clonedParentWork.getAllLeafOperators()) { - if (op instanceof ReduceSinkOperator) { - if (!((ReduceSinkOperator)op).getConf().getOutputName().equals(childReducerName)) { - removeOpRecursive(op); - } - } else if (!isFirst) { - removeOpRecursive(op); - } - } - - isFirst = false; - - // Then, we need to set up the graph connection. Especially: - // 1, we need to connect this cloned parent work with all the grand-parent works. - // 2, we need to connect this cloned parent work with the corresponding child work. - sparkWork.add(clonedParentWork); - for (BaseWork gpw : grandParentWorks) { - sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork)); - } - sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty); - cloneToWork.put(clonedParentWork, parentWork); - } - - sparkWork.remove(parentWork); - } - - // Remove op from all its parents' child list. - // Recursively remove any of its parent who only have this op as child. - private void removeOpRecursive(Operator operator) { - List> parentOperators = new ArrayList>(); - for (Operator op : operator.getParentOperators()) { - parentOperators.add(op); - } - for (Operator parentOperator : parentOperators) { - Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator), - "AssertionError: parent of " + operator.getName() + " doesn't have it as child."); - parentOperator.removeChild(operator); - if (parentOperator.getNumChild() == 0) { - removeOpRecursive(parentOperator); - } - } - } - private Class getInputFormat(JobConf jobConf, MapWork mWork) throws HiveException { // MergeFileWork is sub-class of MapWork, we don't need to distinguish here if (mWork.getInputformat() != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java new file mode 100644 index 0000000..67dda02 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SplitSparkWorkResolver.java @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.spark; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.spark.SparkTask; +import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; +import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalPlanResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.spark.GenSparkUtils; +import org.apache.hadoop.hive.ql.plan.BaseWork; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.ReduceWork; +import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty; +import org.apache.hadoop.hive.ql.plan.SparkWork; + +import java.io.Serializable; +import java.util.*; + +/** + * Do a BFS on the sparkWork graph, and look for any work that has more than one child. + * If we found such a work, we split it into multiple ones, one for each of its child. + */ +public class SplitSparkWorkResolver implements PhysicalPlanResolver { + @Override + public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { + for (Task task : pctx.getRootTasks()) { + if (task instanceof SparkTask) { + splitSparkWork(((SparkTask) task).getWork()); + } + } + return pctx; + } + + private void splitSparkWork(SparkWork sparkWork) { + Queue queue = new LinkedList(); + Set visited = new HashSet(); + queue.addAll(sparkWork.getRoots()); + while (!queue.isEmpty()) { + BaseWork work = queue.poll(); + if (!visited.add(work)) { + continue; + } + + List childWorks = sparkWork.getChildren(work); + // First, add all children of this work into queue, to be processed later. + for (BaseWork w : childWorks) { + queue.add(w); + } + + // Second, check if this work has multiple reduceSinks. If so, do split. + splitBaseWork(sparkWork, work, childWorks); + } + } + + // Split work into multiple branches, one for each childWork in childWorks. + // It also set up the connection between each parent work and child work. + private void splitBaseWork(SparkWork sparkWork, BaseWork parentWork, List childWorks) { + if (getAllReduceSinks(parentWork).size() <= 1) { + return; + } + + // Grand-parent works - we need to set these to be the parents of the cloned works. + List grandParentWorks = sparkWork.getParents(parentWork); + boolean isFirst = true; + + for (BaseWork childWork : childWorks) { + BaseWork clonedParentWork = Utilities.cloneBaseWork(parentWork); + // give the cloned work a different name + clonedParentWork.setName(clonedParentWork.getName().replaceAll("^([a-zA-Z]+)(\\s+)(\\d+)", + "$1$2" + GenSparkUtils.getUtils().getNextSeqNumber())); + setStatistics(parentWork, clonedParentWork); + String childReducerName = childWork.getName(); + SparkEdgeProperty clonedEdgeProperty = sparkWork.getEdgeProperty(parentWork, childWork); + + // We need to remove those branches that + // 1, ended with a ReduceSinkOperator, and + // 2, the ReduceSinkOperator's name is not the same as childReducerName. + // Also, if the cloned work is not the first, we remove ALL leaf operators except + // the corresponding ReduceSinkOperator. + for (Operator op : clonedParentWork.getAllLeafOperators()) { + if (op instanceof ReduceSinkOperator) { + if (!((ReduceSinkOperator) op).getConf().getOutputName().equals(childReducerName)) { + removeOpRecursive(op); + } + } else if (!isFirst) { + removeOpRecursive(op); + } + } + + isFirst = false; + + // Then, we need to set up the graph connection. Especially: + // 1, we need to connect this cloned parent work with all the grand-parent works. + // 2, we need to connect this cloned parent work with the corresponding child work. + sparkWork.add(clonedParentWork); + for (BaseWork gpw : grandParentWorks) { + sparkWork.connect(gpw, clonedParentWork, sparkWork.getEdgeProperty(gpw, parentWork)); + } + sparkWork.connect(clonedParentWork, childWork, clonedEdgeProperty); + sparkWork.getCloneToWork().put(clonedParentWork, parentWork); + } + + sparkWork.remove(parentWork); + } + + private Set> getAllReduceSinks(BaseWork work) { + Set> resultSet = work.getAllLeafOperators(); + Iterator> it = resultSet.iterator(); + while (it.hasNext()) { + if (!(it.next() instanceof ReduceSinkOperator)) { + it.remove(); + } + } + return resultSet; + } + + // Remove op from all its parents' child list. + // Recursively remove any of its parent who only have this op as child. + private void removeOpRecursive(Operator operator) { + List> parentOperators = new ArrayList>(); + for (Operator op : operator.getParentOperators()) { + parentOperators.add(op); + } + for (Operator parentOperator : parentOperators) { + Preconditions.checkArgument(parentOperator.getChildOperators().contains(operator), + "AssertionError: parent of " + operator.getName() + " doesn't have it as child."); + parentOperator.removeChild(operator); + if (parentOperator.getNumChild() == 0) { + removeOpRecursive(parentOperator); + } + } + } + + // we lost statistics & opTraits through cloning, try to get them back + // TODO: make sure this method is sufficient to solve the problem + private void setStatistics(BaseWork origin, BaseWork clone) { + if (origin instanceof MapWork && clone instanceof MapWork) { + MapWork originMW = (MapWork) origin; + MapWork cloneMW = (MapWork) clone; + for (Map.Entry> entry : + originMW.getAliasToWork().entrySet()) { + String alias = entry.getKey(); + Operator cloneOP = cloneMW.getAliasToWork().get(alias); + if (cloneOP != null) { + setStatistics(entry.getValue(), cloneOP); + } + } + } else if (origin instanceof ReduceWork && clone instanceof ReduceWork) { + setStatistics(((ReduceWork) origin).getReducer(), ((ReduceWork) clone).getReducer()); + } + } + + private void setStatistics(Operator origin, + Operator clone) { + clone.getConf().setStatistics(origin.getConf().getStatistics()); + clone.getConf().setOpTraits(origin.getConf().getOpTraits()); + if (origin.getChildOperators().size() == clone.getChildOperators().size()) { + for (int i = 0; i < clone.getChildOperators().size(); i++) { + setStatistics(origin.getChildOperators().get(i), clone.getChildOperators().get(i)); + } + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java index f6ca068..5a01ffa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java @@ -410,4 +410,8 @@ private static boolean isSame(List list1, List list2 } return null; } + + public synchronized int getNextSeqNumber() { + return ++sequenceNumber; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 11e711e..a76cac5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism; import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory; +import org.apache.hadoop.hive.ql.optimizer.spark.SplitSparkWorkResolver; import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -262,6 +263,8 @@ protected void optimizeTaskPlan(List> rootTasks, Pa PhysicalContext physicalCtx = new PhysicalContext(conf, pCtx, pCtx.getContext(), rootTasks, pCtx.getFetchTask()); + physicalCtx = new SplitSparkWorkResolver().resolve(physicalCtx); + if (conf.getBoolVar(HiveConf.ConfVars.HIVENULLSCANOPTIMIZE)) { physicalCtx = new NullScanOptimizer().resolve(physicalCtx); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java index 351d533..d2a1c0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java @@ -49,15 +49,20 @@ private final Set roots = new HashSet(); private final Set leaves = new HashSet(); - protected final Map> workGraph = new HashMap>(); - protected final Map> invertedWorkGraph = new HashMap>(); + protected final Map> workGraph = + new HashMap>(); + protected final Map> invertedWorkGraph = + new HashMap>(); protected final Map, SparkEdgeProperty> edgeProperties = new HashMap, SparkEdgeProperty>(); private Map> requiredCounterPrefix; + private final Map cloneToWork; + public SparkWork(String name) { this.name = name + ":" + (++counter); + cloneToWork = new HashMap(); } @@ -305,20 +310,25 @@ public int compareTo(Dependency o) { @Explain(displayName = "Edges") public Map> getDependencyMap() { Map> result = new LinkedHashMap>(); - for (Map.Entry> entry: invertedWorkGraph.entrySet()) { - List dependencies = new LinkedList(); - for (BaseWork d: entry.getValue()) { - Dependency dependency = new Dependency(); - dependency.w = d; - dependency.prop = getEdgeProperty(d, entry.getKey()); - dependencies.add(dependency); - } - if (!dependencies.isEmpty()) { - Collections.sort(dependencies); - result.put(entry.getKey().getName(), dependencies); + for (BaseWork baseWork : getAllWork()) { + if (invertedWorkGraph.get(baseWork) != null && invertedWorkGraph.get(baseWork).size() > 0) { + List dependencies = new LinkedList(); + for (BaseWork d : invertedWorkGraph.get(baseWork)) { + Dependency dependency = new Dependency(); + dependency.w = d; + dependency.prop = getEdgeProperty(d, baseWork); + dependencies.add(dependency); + } + if (!dependencies.isEmpty()) { + Collections.sort(dependencies); + result.put(baseWork.getName(), dependencies); + } } } return result; } + public Map getCloneToWork() { + return cloneToWork; + } } diff --git ql/src/test/results/clientpositive/spark/auto_join13.q.out ql/src/test/results/clientpositive/spark/auto_join13.q.out index a7f03e1..55f9c59 100644 --- ql/src/test/results/clientpositive/spark/auto_join13.q.out +++ ql/src/test/results/clientpositive/spark/auto_join13.q.out @@ -68,23 +68,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -104,6 +87,23 @@ STAGE PLANS: Map-reduce partition columns: (_col0 + _col2) (type: double) Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col3 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/auto_join18.q.out ql/src/test/results/clientpositive/spark/auto_join18.q.out index 2d347a3..7672c0d 100644 --- ql/src/test/results/clientpositive/spark/auto_join18.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -33,9 +33,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -58,6 +58,24 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Map 5 Map Operator Tree: TableScan @@ -79,23 +97,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -138,24 +156,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index 1a6cc33..0c9c3bf 100644 --- ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -35,9 +35,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +60,24 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Map 5 Map Operator Tree: TableScan @@ -81,24 +99,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -140,24 +158,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join2.q.out ql/src/test/results/clientpositive/spark/auto_join2.q.out index 72bd25e..f45554d 100644 --- ql/src/test/results/clientpositive/spark/auto_join2.q.out +++ ql/src/test/results/clientpositive/spark/auto_join2.q.out @@ -41,20 +41,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Map 5 Map Operator Tree: TableScan @@ -87,6 +73,20 @@ STAGE PLANS: Map-reduce partition columns: (_col0 + _col5) (type: double) Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/auto_join22.q.out ql/src/test/results/clientpositive/spark/auto_join22.q.out index c6729c6..43f6805 100644 --- ql/src/test/results/clientpositive/spark/auto_join22.q.out +++ ql/src/test/results/clientpositive/spark/auto_join22.q.out @@ -44,19 +44,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: src4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -77,6 +64,19 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/auto_join27.q.out ql/src/test/results/clientpositive/spark/auto_join27.q.out index ac8cc21..f0b7784 100644 --- ql/src/test/results/clientpositive/spark/auto_join27.q.out +++ ql/src/test/results/clientpositive/spark/auto_join27.q.out @@ -31,9 +31,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Union 3 <- Map 6 (NONE, 0), Reducer 2 (NONE, 0) Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP, 1) - Union 3 <- Map 6 (NONE, 0), Reducer 2 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -58,6 +58,19 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Map 6 Map Operator Tree: TableScan @@ -71,6 +84,8 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) + Union 3 + Vertex: Union 3 Map 7 Map Operator Tree: TableScan @@ -88,19 +103,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -139,8 +141,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join30.q.out ql/src/test/results/clientpositive/spark/auto_join30.q.out index 6d44aec..369de7c 100644 --- ql/src/test/results/clientpositive/spark/auto_join30.q.out +++ ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -23,9 +23,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 6 <- Map 5 (SORT, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 6 <- Map 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -44,6 +44,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -61,17 +72,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -113,18 +125,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -176,9 +176,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 6 <- Map 5 (SORT, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 6 <- Map 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -194,6 +194,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -208,17 +219,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -260,18 +272,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -323,9 +323,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 6 <- Map 5 (SORT, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 6 <- Map 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -341,6 +341,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -355,17 +366,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -407,18 +419,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -476,10 +476,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) Reducer 6 <- Map 5 (SORT, 1) Reducer 8 <- Map 7 (SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -499,6 +499,17 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -515,6 +526,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -532,17 +554,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 8 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -586,29 +609,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -672,10 +672,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) Reducer 6 <- Map 5 (SORT, 1) Reducer 8 <- Map 7 (SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -692,6 +692,17 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -705,6 +716,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -719,17 +741,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 8 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -773,29 +796,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -859,10 +859,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) Reducer 6 <- Map 5 (SORT, 1) Reducer 8 <- Map 7 (SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -879,6 +879,17 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -892,6 +903,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -906,17 +928,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 8 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -960,29 +983,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -1046,10 +1046,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) Reducer 6 <- Map 5 (SORT, 1) Reducer 8 <- Map 7 (SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1066,6 +1066,17 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -1079,6 +1090,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -1093,17 +1115,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 8 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -1147,29 +1170,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -1233,10 +1233,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) Reducer 6 <- Map 5 (SORT, 1) Reducer 8 <- Map 7 (SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1253,6 +1253,17 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -1266,6 +1277,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -1280,17 +1302,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 8 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -1334,29 +1357,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join31.q.out ql/src/test/results/clientpositive/spark/auto_join31.q.out index 57a7b8f..aeb18de 100644 --- ql/src/test/results/clientpositive/spark/auto_join31.q.out +++ ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -29,10 +29,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 3 (GROUP, 1) Reducer 6 <- Map 5 (SORT, 1) Reducer 8 <- Map 7 (SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -49,6 +49,17 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -62,6 +73,17 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 7 Map Operator Tree: TableScan @@ -76,17 +98,18 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 + Reducer 8 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -130,29 +153,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 8 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index 6f9e51a..d682dd4 100644 --- ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -247,22 +247,23 @@ STAGE PLANS: name: default.orderpayment_small Truncated Path -> Alias: /orderpayment_small [dim_pay_date] - Map 6 + Map 8 Map Operator Tree: TableScan - alias: deal + alias: orderpayment Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: dealid is not null (type: boolean) + predicate: (((date is not null and dealid is not null) and cityid is not null) and userid is not null) (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: dealid (type: int) + key expressions: date (type: string) sort order: + - Map-reduce partition columns: dealid (type: int) + Map-reduce partition columns: date (type: string) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 1 + tag: 0 + value expressions: dealid (type: int), cityid (type: int), userid (type: int) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -312,21 +313,40 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [deal] - Map 7 + /orderpayment_small [orderpayment] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col2} {VALUE._col3} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col3, _col4, _col9 + Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col3 (type: int), _col4 (type: int), _col9 (type: string) + auto parallelism: false + Map 6 Map Operator Tree: TableScan - alias: order_city + alias: deal Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: cityid is not null (type: boolean) + predicate: dealid is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: cityid (type: int) + key expressions: dealid (type: int) sort order: + - Map-reduce partition columns: cityid (type: int) + Map-reduce partition columns: dealid (type: int) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false @@ -378,24 +398,42 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [order_city] - Map 8 + /orderpayment_small [deal] + Reducer 3 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col2} {VALUE._col3} {VALUE._col8} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col3, _col4, _col9, _col16 + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col4 (type: int), _col9 (type: string), _col16 (type: int) + auto parallelism: false + Map 7 Map Operator Tree: TableScan - alias: orderpayment + alias: order_city Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (((date is not null and dealid is not null) and cityid is not null) and userid is not null) (type: boolean) + predicate: cityid is not null (type: boolean) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: date (type: string) + key expressions: cityid (type: int) sort order: + - Map-reduce partition columns: date (type: string) + Map-reduce partition columns: cityid (type: int) Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: dealid (type: int), cityid (type: int), userid (type: int) + tag: 1 auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -445,7 +483,26 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [orderpayment] + /orderpayment_small [order_city] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col3} {VALUE._col8} {VALUE._col15} + 1 + outputColumnNames: _col4, _col9, _col16 + Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col4 (type: int) + sort order: + + Map-reduce partition columns: _col4 (type: int) + Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col9 (type: string), _col16 (type: int) + auto parallelism: false Map 9 Map Operator Tree: TableScan @@ -512,63 +569,6 @@ STAGE PLANS: name: default.user_small Truncated Path -> Alias: /user_small [user] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col2} {VALUE._col3} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col3, _col4, _col9 - Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 39 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col3 (type: int), _col4 (type: int), _col9 (type: string) - auto parallelism: false - Reducer 3 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col2} {VALUE._col3} {VALUE._col8} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col3, _col4, _col9, _col16 - Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col4 (type: int), _col9 (type: string), _col16 (type: int) - auto parallelism: false - Reducer 4 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col3} {VALUE._col8} {VALUE._col15} - 1 - outputColumnNames: _col4, _col9, _col16 - Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col4 (type: int) - sort order: + - Map-reduce partition columns: _col4 (type: int) - Statistics: Num rows: 1 Data size: 46 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col9 (type: string), _col16 (type: int) - auto parallelism: false Reducer 5 Needs Tagging: true Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out index f5e8f88..d9f8bd8 100644 --- ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_without_localtask.q.out @@ -153,19 +153,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -196,6 +183,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -307,19 +307,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -350,6 +337,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index d961067..96f75cf 100644 --- ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -301,8 +301,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -336,6 +336,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -367,7 +382,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -400,21 +415,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out index 8625f07..bbdb362 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out @@ -72,9 +72,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) - Union 2 <- Map 1 (NONE, 0), Map 5 (NONE, 0) #### A masked pattern was here #### Vertices: Map 1 @@ -109,6 +109,8 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Union 2 + Vertex: Union 2 Map 6 Map Operator Tree: TableScan @@ -164,8 +166,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -256,23 +256,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -290,6 +273,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out index 03ffb12..013f332 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -210,9 +210,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -538,6 +538,23 @@ STAGE PLANS: name: default.bucket_small Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -594,23 +611,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 5 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 - 1 - 2 - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 127 Data size: 12786 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index 063590f..9faec43 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -406,8 +406,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -441,6 +441,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -472,7 +487,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -505,21 +520,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -1946,8 +1946,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1981,6 +1981,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -2012,7 +2027,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2045,21 +2060,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 20b5adc..ef72223 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -205,8 +205,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -247,6 +247,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reducer 2 Reduce Operator Tree: Join Operator @@ -268,24 +286,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: double), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -311,9 +311,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -357,27 +357,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Join Operator @@ -422,6 +401,27 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -447,9 +447,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -493,27 +493,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Join Operator @@ -558,6 +537,27 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 66 Data size: 700 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -673,8 +673,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -718,6 +718,26 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -739,26 +759,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator @@ -786,8 +786,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -828,6 +828,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: Join Operator @@ -849,24 +867,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator @@ -892,8 +892,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -928,6 +928,24 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 14 Data size: 1456 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: Join Operator @@ -949,24 +967,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator @@ -1159,8 +1159,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1204,6 +1204,22 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -1225,22 +1241,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index e7ec4c8..aa508d2 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -120,8 +120,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -165,6 +165,22 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -186,22 +202,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator @@ -555,8 +555,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -594,6 +594,22 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -615,22 +631,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -650,8 +650,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -690,6 +690,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -711,23 +728,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/column_access_stats.q.out ql/src/test/results/clientpositive/spark/column_access_stats.q.out index 8c281be..9875ce2 100644 --- ql/src/test/results/clientpositive/spark/column_access_stats.q.out +++ ql/src/test/results/clientpositive/spark/column_access_stats.q.out @@ -704,8 +704,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -756,6 +756,25 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -777,25 +796,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/groupby10.q.out ql/src/test/results/clientpositive/spark/groupby10.q.out index 561f1dd..d51088b 100644 --- ql/src/test/results/clientpositive/spark/groupby10.q.out +++ ql/src/test/results/clientpositive/spark/groupby10.q.out @@ -56,9 +56,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,23 +73,11 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(KEY._col0), count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Group By Operator aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) keys: VALUE._col0 (type: int) mode: hash @@ -100,10 +89,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 @@ -119,11 +108,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) + aggregations: count(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 @@ -139,7 +144,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection @@ -268,9 +273,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -284,7 +290,7 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE @@ -300,18 +306,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Group By Operator - aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double) Reducer 3 Reduce Operator Tree: Group By Operator @@ -332,6 +326,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) Reducer 4 Reduce Operator Tree: Group By Operator @@ -482,9 +492,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -498,23 +509,11 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: bigint) - Group By Operator aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0) keys: VALUE._col0 (type: int) mode: hash @@ -526,10 +525,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: struct) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), avg(VALUE._col1) keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 @@ -545,11 +544,27 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: final outputColumnNames: _col0, _col1, _col2 @@ -565,7 +580,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby11.q.out ql/src/test/results/clientpositive/spark/groupby11.q.out index 41967e5..b23b3f2 100644 --- ql/src/test/results/clientpositive/spark/groupby11.q.out +++ ql/src/test/results/clientpositive/spark/groupby11.q.out @@ -44,9 +44,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,7 +61,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string), substr(value, 5) (type: string) - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -76,18 +77,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Group By Operator - aggregations: count(KEY._col0), count(DISTINCT KEY._col0) - keys: VALUE._col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -108,6 +97,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/groupby7_map.q.out ql/src/test/results/clientpositive/spark/groupby7_map.q.out index 9a3460f..760dcef 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) - Reducer 3 <- Map 1 (GROUP, 31) + Reducer 3 <- Map 5 (GROUP, 31) + Reducer 2 <- Map 4 (GROUP, 31) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -65,23 +65,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -100,8 +84,29 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 3 + name: default.dest2 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -120,7 +125,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out index 2e43287..c324c66 100644 --- ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out @@ -40,13 +40,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 31) Reducer 5 <- Reducer 4 (GROUP, 31) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -67,22 +67,6 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: Group By Operator @@ -117,6 +101,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reducer 4 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out index ccfdf39..e4e0613 100644 --- ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) - Reducer 3 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 4 (GROUP, 31) + Reducer 3 <- Map 5 (GROUP, 31) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -59,16 +59,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: substr(value, 5) (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -89,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out index 396c6a4..e433aeb 100644 --- ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 31) + Reducer 4 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,7 +61,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -81,23 +82,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: double) - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: double) Reducer 3 Reduce Operator Tree: Select Operator @@ -119,6 +103,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) Reducer 4 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/groupby8.q.out ql/src/test/results/clientpositive/spark/groupby8.q.out index d7c5b6d..505f6ac 100644 --- ql/src/test/results/clientpositive/spark/groupby8.q.out +++ ql/src/test/results/clientpositive/spark/groupby8.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,7 +57,7 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -72,19 +73,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -103,8 +92,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -123,7 +128,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection @@ -828,9 +833,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -844,7 +850,7 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -860,18 +866,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -892,6 +886,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/groupby8_map.q.out ql/src/test/results/clientpositive/spark/groupby8_map.q.out index 9d491be..d6a1fd2 100644 --- ql/src/test/results/clientpositive/spark/groupby8_map.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_map.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 6 <- Map 1 (SORT, 31) + Reducer 4 <- Reducer 6 (GROUP, 31) + Reducer 5 <- Map 1 (SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -56,7 +57,7 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -72,19 +73,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -103,8 +92,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -123,7 +128,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out index 9d491be..a16b44d 100644 --- ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 5 <- Map 1 (SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 31) + Reducer 6 <- Map 1 (SORT, 31) + Reducer 4 <- Reducer 6 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -56,7 +57,7 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -72,18 +73,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -104,6 +93,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out index 9d491be..d6a1fd2 100644 --- ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out +++ ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 31) - Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 4 <- Reducer 2 (GROUP, 31) + Reducer 6 <- Map 1 (SORT, 31) + Reducer 4 <- Reducer 6 (GROUP, 31) + Reducer 5 <- Map 1 (SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -56,7 +57,7 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -72,19 +73,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -103,8 +92,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -123,7 +128,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby9.q.out ql/src/test/results/clientpositive/spark/groupby9.q.out index de795a0..8606866 100644 --- ql/src/test/results/clientpositive/spark/groupby9.q.out +++ ql/src/test/results/clientpositive/spark/groupby9.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,24 +57,12 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) keys: VALUE._col0 (type: string), VALUE._col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -84,17 +73,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -103,18 +92,34 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -123,7 +128,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection @@ -829,9 +834,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -845,7 +851,7 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -861,18 +867,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col1 (type: string), VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -893,6 +887,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string), VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -1618,9 +1628,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1634,24 +1645,12 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) keys: VALUE._col0 (type: string), VALUE._col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1662,17 +1661,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1681,18 +1680,34 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1701,7 +1716,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection @@ -2407,11 +2422,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -2422,22 +2437,6 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(substr(value, 5)) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(substr(value, 5)) keys: key (type: string), value (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -2448,17 +2447,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2467,18 +2466,39 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 3 + name: default.dest2 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(substr(value, 5)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -2487,7 +2507,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection @@ -3193,9 +3213,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3209,24 +3230,12 @@ STAGE PLANS: Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) keys: VALUE._col1 (type: string), VALUE._col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -3237,17 +3246,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3256,18 +3265,34 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: final - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -3276,7 +3301,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out index 7585064..68dcd90 100644 --- ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out +++ ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out @@ -52,33 +52,17 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) + Reducer 2 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: array(key) (type: array) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: array) - sort order: + - Map-reduce partition columns: _col0 (type: array) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -94,32 +78,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: map) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: struct(key,value) (type: struct) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: struct) - sort order: + - Map-reduce partition columns: _col0 (type: struct) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: array) + keys: KEY._col0 (type: map) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: array), _col1 (type: bigint) + expressions: _col0 (type: map), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -129,17 +97,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 3 + name: default.dest2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: struct(key,value) (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: map) + keys: KEY._col0 (type: struct) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: map), _col1 (type: bigint) + expressions: _col0 (type: struct), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -149,17 +138,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - Reducer 4 + name: default.dest3 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: array(key) (type: array) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: struct) + keys: KEY._col0 (type: array) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: struct), _col1 (type: bigint) + expressions: _col0 (type: array), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -169,7 +179,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 + name: default.dest1 Stage: Stage-4 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out index 7e4fa35..3575eb6 100644 --- ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out @@ -40,34 +40,18 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 7 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: array(key) (type: array) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: array) - sort order: + - Map-reduce partition columns: _col0 (type: array) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -83,16 +67,16 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: map) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: array) + keys: KEY._col0 (type: map) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: array), _col1 (type: bigint) + expressions: _col0 (type: map), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit @@ -101,11 +85,11 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: array), _col1 (type: bigint) - Reducer 3 + value expressions: _col0 (type: map), _col1 (type: bigint) + Reducer 5 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: array), VALUE._col1 (type: bigint) + expressions: VALUE._col0 (type: map), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE Limit @@ -118,17 +102,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: array(key) (type: array) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: map) + keys: KEY._col0 (type: array) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: map), _col1 (type: bigint) + expressions: _col0 (type: array), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Limit @@ -137,11 +142,11 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: map), _col1 (type: bigint) - Reducer 5 + value expressions: _col0 (type: array), _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: map), VALUE._col1 (type: bigint) + expressions: VALUE._col0 (type: array), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE Limit @@ -154,7 +159,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby_cube1.q.out ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index 0d0f922..9a63692 100644 --- ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -405,13 +405,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t1 @@ -432,22 +432,6 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(1) - keys: key (type: string), val (type: string), '0' (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -482,6 +466,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Map 7 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(1) + keys: key (type: string), val (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out index c1fd88d..bbc92b1 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out @@ -40,9 +40,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 4 <- Reducer 2 (GROUP, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,24 +57,12 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), (key + key) (type: double) - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT KEY._col0) - keys: VALUE._col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Group By Operator - aggregations: count(DISTINCT KEY._col0) keys: VALUE._col1 (type: double) mode: hash outputColumnNames: _col0, _col1 @@ -84,11 +73,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) + keys: KEY._col0 (type: double) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -103,12 +92,28 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 4 + name: default.dest2 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: double) + keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -123,7 +128,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection @@ -224,11 +229,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -248,21 +253,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT value) - keys: (key + key) (type: double), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -283,6 +273,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value) + keys: (key + key) (type: double), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out index 2dab62f..2c7030c 100644 --- ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out @@ -304,12 +304,12 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 2 <- Map 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -319,19 +319,87 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) - sort order: ++ - Map-reduce partition columns: substr(key, 1, 1) (type: string) + key expressions: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string), substr(value, 5) (type: string) + sort order: +++ + Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Reducer 3 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint) + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_h3 + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_h2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string), substr(value, 5) (type: string) - sort order: +++ - Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) + key expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + sort order: ++ + Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) Reducer 2 @@ -398,69 +466,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 - Reducer 3 - Reduce Operator Tree: - Forward - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint) - Filter Operator - predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_h3 - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_h2 Stage: Stage-6 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/groupby_position.q.out ql/src/test/results/clientpositive/spark/groupby_position.q.out index 697184b..937c7df 100644 --- ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -58,15 +58,40 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), substr(value, 5) (type: string) + keys: key (type: string), value (type: string), substr(value, 5) (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.testtable2 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -76,14 +101,14 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), value (type: string), substr(value, 5) (type: string) + keys: key (type: string), substr(value, 5) (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: @@ -105,26 +130,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.testtable2 Stage: Stage-3 Dependency Collection @@ -238,11 +243,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -251,24 +256,6 @@ STAGE PLANS: predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT substr(value, 5)) - keys: key (type: string), substr(value, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 20) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator expressions: value (type: string), key (type: string) outputColumnNames: value, key Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -283,17 +270,17 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -302,18 +289,41 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.testtable1 - Reducer 3 + name: default.testtable2 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 20) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT substr(value, 5)) + keys: key (type: string), substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -322,7 +332,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.testtable2 + name: default.testtable1 Stage: Stage-3 Dependency Collection @@ -567,9 +577,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (GROUP, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -595,6 +605,24 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Map 5 Map Operator Tree: TableScan @@ -617,13 +645,12 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 28 Data size: 297 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string) @@ -666,23 +693,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14 Data size: 148 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index eb87c7e..283d543 100644 --- ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -393,13 +393,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t1 @@ -420,22 +420,6 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col3 (type: bigint) - Select Operator - expressions: key (type: string), val (type: string) - outputColumnNames: key, val - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: sum(1) - keys: key (type: string), val (type: string), '0' (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -470,6 +454,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Map 7 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: sum(1) + keys: key (type: string), val (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 2cbe0d1..6fa433c 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -3156,8 +3156,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3326,6 +3326,27 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: bigint) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -3362,27 +3383,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: bigint) - auto parallelism: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index c599f4c..a73f366 100644 --- ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -3246,9 +3246,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3417,42 +3417,6 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1,_col2,_col3,_col4 - columns.types string:bigint:string:string:bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Reducer 4 Needs Tagging: false Reduce Operator Tree: @@ -3491,6 +3455,42 @@ STAGE PLANS: tag: 1 value expressions: _col1 (type: string), _col2 (type: bigint) auto parallelism: false + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types string:bigint:string:string:bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/input1_limit.q.out ql/src/test/results/clientpositive/spark/input1_limit.q.out index e9f8d5e..e6f7ac6 100644 --- ql/src/test/results/clientpositive/spark/input1_limit.q.out +++ ql/src/test/results/clientpositive/spark/input1_limit.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -63,20 +63,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) - Filter Operator - predicate: (key < 100) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 5 - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator @@ -98,6 +84,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 100) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/insert_into3.q.out ql/src/test/results/clientpositive/spark/insert_into3.q.out index 1313d26..17ef92c 100644 --- ql/src/test/results/clientpositive/spark/insert_into3.q.out +++ ql/src/test/results/clientpositive/spark/insert_into3.q.out @@ -40,11 +40,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Map 1 (SORT, 1) + Reducer 3 <- Map 5 (SORT, 1) + Reducer 2 <- Map 4 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -57,56 +57,61 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 50 - Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into3a - Reducer 3 + name: default.insert_into3b + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit - Number of rows: 100 - Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Number of rows: 50 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.insert_into3b + name: default.insert_into3a Stage: Stage-3 Dependency Collection @@ -197,11 +202,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -217,17 +222,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator @@ -249,6 +243,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 3 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/join13.q.out ql/src/test/results/clientpositive/spark/join13.q.out index 1f3374d..fc2e01e 100644 --- ql/src/test/results/clientpositive/spark/join13.q.out +++ ql/src/test/results/clientpositive/spark/join13.q.out @@ -67,23 +67,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -103,6 +86,23 @@ STAGE PLANS: Map-reduce partition columns: (_col0 + _col2) (type: double) Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col3 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 200) and UDFToDouble(key) is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/join18.q.out ql/src/test/results/clientpositive/spark/join18.q.out index f16fc04..5539986 100644 --- ql/src/test/results/clientpositive/spark/join18.q.out +++ ql/src/test/results/clientpositive/spark/join18.q.out @@ -33,8 +33,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -57,6 +57,24 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -78,23 +96,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -117,24 +135,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out index 2960522..f79ec4d 100644 --- ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -39,8 +39,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -63,6 +63,24 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -84,24 +102,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Join Operator @@ -123,24 +141,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join19.q.out ql/src/test/results/clientpositive/spark/join19.q.out index 5220349..6e2ec97 100644 --- ql/src/test/results/clientpositive/spark/join19.q.out +++ ql/src/test/results/clientpositive/spark/join19.q.out @@ -126,9 +126,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 7 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -185,41 +185,59 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string) - Map 7 + Map 8 Map Operator Tree: TableScan - alias: t5 + alias: t1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (((predicate = 'http://www.ontosearch.com/2007/12/ontosofa-ns#_to') and subject is not null) and object is not null) (type: boolean) + predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Citation')) and subject is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: subject (type: string), object (type: string) - outputColumnNames: _col0, _col1 + expressions: subject (type: string) + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: string) - Map 8 + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} + 2 {VALUE._col0} + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string) + Map 7 Map Operator Tree: TableScan - alias: t1 + alias: t5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Filter Operator - predicate: (((predicate = 'http://sofa.semanticweb.org/sofa/v1.0/system#__INSTANCEOF_REL') and (object = 'http://ontos/OntosMiner/Common.English/ontology#Citation')) and subject is not null) (type: boolean) + predicate: (((predicate = 'http://www.ontosearch.com/2007/12/ontosofa-ns#_to') and subject is not null) and object is not null) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: subject (type: string) - outputColumnNames: _col0 + expressions: subject (type: string), object (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string) Map 9 Map Operator Tree: TableScan @@ -237,6 +255,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + condition expressions: + 0 {VALUE._col0} {VALUE._col2} {KEY.reducesinkkey0} + 1 + 2 {VALUE._col0} + outputColumnNames: _col0, _col2, _col3, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col7 (type: string) + sort order: + + Map-reduce partition columns: _col7 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -258,42 +294,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} - 2 {VALUE._col0} - outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: string) - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string) - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 1 to 2 - condition expressions: - 0 {VALUE._col0} {VALUE._col2} {KEY.reducesinkkey0} - 1 - 2 {VALUE._col0} - outputColumnNames: _col0, _col2, _col3, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col7 (type: string) - sort order: + - Map-reduce partition columns: _col7 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join2.q.out ql/src/test/results/clientpositive/spark/join2.q.out index bff76ef..b2e6471 100644 --- ql/src/test/results/clientpositive/spark/join2.q.out +++ ql/src/test/results/clientpositive/spark/join2.q.out @@ -41,20 +41,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: src3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Map 5 Map Operator Tree: TableScan @@ -87,6 +73,20 @@ STAGE PLANS: Map-reduce partition columns: (_col0 + _col5) (type: double) Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: src3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/join22.q.out ql/src/test/results/clientpositive/spark/join22.q.out index d108173..65baaff 100644 --- ql/src/test/results/clientpositive/spark/join22.q.out +++ ql/src/test/results/clientpositive/spark/join22.q.out @@ -43,19 +43,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: src4 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -76,6 +63,19 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/join28.q.out ql/src/test/results/clientpositive/spark/join28.q.out index 0349069..d163341 100644 --- ql/src/test/results/clientpositive/spark/join28.q.out +++ ql/src/test/results/clientpositive/spark/join28.q.out @@ -70,20 +70,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -103,6 +89,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/join29.q.out ql/src/test/results/clientpositive/spark/join29.q.out index 374f89c..b8e690f 100644 --- ql/src/test/results/clientpositive/spark/join29.q.out +++ ql/src/test/results/clientpositive/spark/join29.q.out @@ -37,8 +37,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +65,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -89,23 +107,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -129,24 +147,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join31.q.out ql/src/test/results/clientpositive/spark/join31.q.out index 00c2cba..a7390a3 100644 --- ql/src/test/results/clientpositive/spark/join31.q.out +++ ql/src/test/results/clientpositive/spark/join31.q.out @@ -39,9 +39,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +68,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -92,23 +109,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -155,23 +172,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 - Reducer 6 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join32.q.out ql/src/test/results/clientpositive/spark/join32.q.out index 8c538fc..00c6b2c 100644 --- ql/src/test/results/clientpositive/spark/join32.q.out +++ ql/src/test/results/clientpositive/spark/join32.q.out @@ -109,8 +109,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -315,6 +315,25 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string), _col6 (type: string) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -356,25 +375,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 4 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col6 (type: string) - auto parallelism: false Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join32_lessSize.q.out ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 00d3c63..eecc9f8 100644 --- ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -117,8 +117,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -323,6 +323,25 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string), _col6 (type: string) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -364,25 +383,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 4 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col6 (type: string) - auto parallelism: false Stage: Stage-2 Dependency Collection @@ -696,23 +696,23 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [w] - Map 4 + Map 6 Map Operator Tree: TableScan - alias: z + alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + predicate: (value is not null and key is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: string) + key expressions: value (type: string) sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - tag: 2 - value expressions: value (type: string) + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### @@ -762,23 +762,41 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [z] - Map 5 + /src1 [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col0} + outputColumnNames: _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: string) + sort order: + + Map-reduce partition columns: _col5 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + Map 4 Map Operator Tree: TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: z + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - tag: 1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + tag: 2 value expressions: value (type: string) auto parallelism: false Path -> Alias: @@ -786,7 +804,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -796,14 +814,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -816,44 +834,44 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [y] - Map 6 + /src1 [z] + Map 5 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: string) sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: 1 - value expressions: key (type: string) + value expressions: value (type: string) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -863,14 +881,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -883,38 +901,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col0} - outputColumnNames: _col5 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col5 (type: string) - sort order: + - Map-reduce partition columns: _col5 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + /src [y] Reducer 3 Needs Tagging: true Reduce Operator Tree: @@ -1363,6 +1363,29 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: false Map 5 Map Operator Tree: TableScan @@ -1431,29 +1454,6 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string) - auto parallelism: false Reducer 3 Needs Tagging: true Reduce Operator Tree: @@ -1888,6 +1888,29 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: false Map 5 Map Operator Tree: TableScan @@ -1956,29 +1979,6 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string) - auto parallelism: false Reducer 3 Needs Tagging: true Reduce Operator Tree: @@ -2241,19 +2241,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2274,6 +2261,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -2482,19 +2482,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -2515,6 +2502,19 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/join33.q.out ql/src/test/results/clientpositive/spark/join33.q.out index 8c538fc..00c6b2c 100644 --- ql/src/test/results/clientpositive/spark/join33.q.out +++ ql/src/test/results/clientpositive/spark/join33.q.out @@ -109,8 +109,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -315,6 +315,25 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col0 (type: string), _col6 (type: string) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -356,25 +375,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 4 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col0 (type: string), _col6 (type: string) - auto parallelism: false Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join34.q.out ql/src/test/results/clientpositive/spark/join34.q.out index ad52ccd..a1cf7eb 100644 --- ql/src/test/results/clientpositive/spark/join34.q.out +++ ql/src/test/results/clientpositive/spark/join34.q.out @@ -148,8 +148,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Union 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -286,6 +286,8 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x1] + Union 2 + Vertex: Union 2 Map 5 Map Operator Tree: TableScan @@ -394,8 +396,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Union 2 - Vertex: Union 2 Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join35.q.out ql/src/test/results/clientpositive/spark/join35.q.out index 53ab4c7..b162cee 100644 --- ql/src/test/results/clientpositive/spark/join35.q.out +++ ql/src/test/results/clientpositive/spark/join35.q.out @@ -157,9 +157,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 6 (NONE, 0) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -239,6 +239,24 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false Map 5 Map Operator Tree: TableScan @@ -316,6 +334,26 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x1] + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false + Union 3 + Vertex: Union 3 Map 7 Map Operator Tree: TableScan @@ -383,24 +421,6 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Reducer 2 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false Reducer 4 Needs Tagging: true Reduce Operator Tree: @@ -442,26 +462,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 6 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - tag: 0 - value expressions: _col1 (type: bigint) - auto parallelism: false - Union 3 - Vertex: Union 3 Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out index 0960ffc..444dfaa 100644 --- ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out +++ ql/src/test/results/clientpositive/spark/join_alt_syntax.q.out @@ -263,8 +263,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -301,6 +301,25 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -325,25 +344,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col12, _col13 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) Stage: Stage-0 Fetch Operator @@ -369,9 +369,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -427,30 +427,22 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) Reducer 4 Reduce Operator Tree: Join Operator @@ -467,22 +459,30 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) - Reducer 6 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col12, _col13 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -508,9 +508,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -566,30 +566,22 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col12, _col13 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) Reducer 4 Reduce Operator Tree: Join Operator @@ -606,22 +598,30 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col12 (type: int), _col13 (type: string), _col25 (type: string) - Reducer 6 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col12, _col13 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col12 (type: int) + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col11} {VALUE._col12} {VALUE._col24} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col12, _col13, _col25, _col36, _col37 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col13 (type: string), _col25 (type: string), _col37 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out index 061e5e8..c0f6c61 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_1.q.out @@ -190,8 +190,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -229,6 +229,25 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -250,25 +269,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Stage: Stage-0 Fetch Operator @@ -290,8 +290,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -329,6 +329,22 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -350,22 +366,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out index 3b92924..9dfbc08 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_2.q.out @@ -119,9 +119,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -180,27 +180,22 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -217,22 +212,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - Reducer 6 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out index d50e99e..0bbe193 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_3.q.out @@ -202,8 +202,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -241,6 +241,25 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -265,25 +284,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Stage: Stage-0 Fetch Operator @@ -307,8 +307,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -346,6 +346,22 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -370,22 +386,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out index 9c1d2e6..ab4ee6f 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_4.q.out @@ -126,9 +126,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -187,30 +187,22 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -227,22 +219,30 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - Reducer 6 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out index 25dcfe5..9ee9541 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual1.q.out @@ -246,8 +246,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -285,6 +285,25 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -306,25 +325,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Stage: Stage-0 Fetch Operator @@ -346,8 +346,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -385,6 +385,22 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -406,22 +422,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out index bf32c7e..ea9c12f 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual2.q.out @@ -175,9 +175,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -236,27 +236,22 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -273,22 +268,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - Reducer 6 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out index 3dc3481..8781d6b 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual3.q.out @@ -258,8 +258,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -297,6 +297,25 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -321,25 +340,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col12 + _col0) = _col0) and _col13 is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Stage: Stage-0 Fetch Operator @@ -363,8 +363,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -402,6 +402,22 @@ STAGE PLANS: sort order: Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -426,22 +442,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - 1 {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col8} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out index e55c192..c7fe754 100644 --- ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out +++ ql/src/test/results/clientpositive/spark/join_cond_pushdown_unqual4.q.out @@ -182,9 +182,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -243,30 +243,22 @@ STAGE PLANS: Map-reduce partition columns: p_name (type: string), p_partkey (type: int) Statistics: Num rows: 7 Data size: 847 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - Reducer 2 + Reducer 6 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} - 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col13 (type: string) + sort order: + + Map-reduce partition columns: _col13 (type: string) + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -283,22 +275,30 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 1024 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string) - Reducer 6 + Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col13 (type: string) - sort order: + - Map-reduce partition columns: _col13 (type: string) - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} {VALUE._col11} {VALUE._col12} {VALUE._col13} {VALUE._col14} {VALUE._col15} {VALUE._col16} {VALUE._col17} {VALUE._col18} {VALUE._col19} {VALUE._col23} {VALUE._col24} {VALUE._col25} {VALUE._col26} {VALUE._col27} {VALUE._col28} {VALUE._col29} {VALUE._col30} {VALUE._col31} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} {VALUE._col7} + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((_col13 = _col25) and (_col0 = _col36)) and (_col0 = _col12)) (type: boolean) + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string), _col24 (type: int), _col25 (type: string), _col26 (type: string), _col27 (type: string), _col28 (type: string), _col29 (type: int), _col30 (type: string), _col31 (type: double), _col32 (type: string), _col36 (type: int), _col37 (type: string), _col38 (type: string), _col39 (type: string), _col40 (type: string), _col41 (type: int), _col42 (type: string), _col43 (type: double), _col44 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35 + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 123 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_hive_626.q.out ql/src/test/results/clientpositive/spark/join_hive_626.q.out index f58f1ce..ca83408 100644 --- ql/src/test/results/clientpositive/spark/join_hive_626.q.out +++ ql/src/test/results/clientpositive/spark/join_hive_626.q.out @@ -90,20 +90,6 @@ STAGE PLANS: Map-reduce partition columns: foo_id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: foo_name (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: hive_count - Statistics: Num rows: 0 Data size: 5 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: bar_id is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: bar_id (type: int) - sort order: + - Map-reduce partition columns: bar_id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: n (type: int) Map 5 Map Operator Tree: TableScan @@ -134,6 +120,20 @@ STAGE PLANS: Map-reduce partition columns: _col9 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string), _col13 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: hive_count + Statistics: Num rows: 0 Data size: 5 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: bar_id is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: bar_id (type: int) + sort order: + + Map-reduce partition columns: bar_id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: n (type: int) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder.q.out ql/src/test/results/clientpositive/spark/join_reorder.q.out index 5ff56fb..5072a52 100644 --- ql/src/test/results/clientpositive/spark/join_reorder.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder.q.out @@ -256,16 +256,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -293,6 +283,16 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col5 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -353,16 +353,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -390,6 +380,16 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 0 Data size: 33 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string), _col5 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder2.q.out ql/src/test/results/clientpositive/spark/join_reorder2.q.out index ce57266..0243881 100644 --- ql/src/test/results/clientpositive/spark/join_reorder2.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder2.q.out @@ -214,9 +214,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -247,20 +247,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: val is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string) Map 7 Map Operator Tree: TableScan @@ -275,27 +261,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Join Operator @@ -312,6 +277,20 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: val is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) Reducer 5 Reduce Operator Tree: Join Operator @@ -328,6 +307,27 @@ STAGE PLANS: Map-reduce partition columns: (_col0 + 1) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_reorder3.q.out ql/src/test/results/clientpositive/spark/join_reorder3.q.out index 16ad478..6e3204a 100644 --- ql/src/test/results/clientpositive/spark/join_reorder3.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder3.q.out @@ -214,9 +214,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -247,20 +247,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: val is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string) Map 7 Map Operator Tree: TableScan @@ -275,27 +261,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Join Operator @@ -312,6 +277,20 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: val is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) Reducer 5 Reduce Operator Tree: Join Operator @@ -328,6 +307,27 @@ STAGE PLANS: Map-reduce partition columns: (_col0 + 1) (type: double) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col5} {VALUE._col6} {VALUE._col10} {VALUE._col11} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_star.q.out ql/src/test/results/clientpositive/spark/join_star.q.out index 430b699..de01ddc 100644 --- ql/src/test/results/clientpositive/spark/join_star.q.out +++ ql/src/test/results/clientpositive/spark/join_star.q.out @@ -229,8 +229,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -275,6 +275,22 @@ STAGE PLANS: Map-reduce partition columns: d1 (type: int) Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: m1 (type: int), m2 (type: int), d2 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3, _col8 + Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -296,22 +312,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3, _col8 - Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 2 Data size: 35 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) Stage: Stage-0 Fetch Operator @@ -353,8 +353,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -399,6 +399,22 @@ STAGE PLANS: Map-reduce partition columns: d1 (type: int) Statistics: Num rows: 4 Data size: 49 Basic stats: COMPLETE Column stats: NONE value expressions: m1 (type: int), m2 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8 + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -420,22 +436,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8 - Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 4 Data size: 53 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) Stage: Stage-0 Fetch Operator @@ -477,8 +477,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -514,6 +514,22 @@ STAGE PLANS: Map-reduce partition columns: d1 (type: int) Statistics: Num rows: 8 Data size: 98 Basic stats: COMPLETE Column stats: NONE value expressions: m1 (type: int), m2 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col8 + Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -535,22 +551,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col8 - Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 8 Data size: 107 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int) Stage: Stage-0 Fetch Operator @@ -608,12 +608,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 1), Map 14 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 1), Reducer 3 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -627,61 +627,6 @@ STAGE PLANS: Map-reduce partition columns: f5 (type: int) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: f6 (type: int) - Map 10 - Map Operator Tree: - TableScan - alias: fact - Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: d1 (type: int) - sort order: + - Map-reduce partition columns: d1 (type: int) - Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE - value expressions: m1 (type: int), m2 (type: int), d2 (type: int) - Map 11 - Map Operator Tree: - TableScan - alias: dim7 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f13 (type: int) - sort order: + - Map-reduce partition columns: f13 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f14 (type: int) - Map 12 - Map Operator Tree: - TableScan - alias: dim6 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f11 (type: int) - sort order: + - Map-reduce partition columns: f11 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f12 (type: int) - Map 13 - Map Operator Tree: - TableScan - alias: dim5 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f9 (type: int) - sort order: + - Map-reduce partition columns: f9 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f10 (type: int) - Map 14 - Map Operator Tree: - TableScan - alias: dim4 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: f7 (type: int) - sort order: + - Map-reduce partition columns: f7 (type: int) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: f8 (type: int) Map 6 Map Operator Tree: TableScan @@ -704,6 +649,49 @@ STAGE PLANS: Map-reduce partition columns: f1 (type: int) Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: f2 (type: int) + Map 10 + Map Operator Tree: + TableScan + alias: fact + Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + Statistics: Num rows: 6 Data size: 98 Basic stats: COMPLETE Column stats: NONE + value expressions: m1 (type: int), m2 (type: int), d2 (type: int) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3, _col8 + Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col3} {KEY.reducesinkkey0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3, _col8, _col13 + Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -720,6 +708,28 @@ STAGE PLANS: Map-reduce partition columns: _col18 (type: int) Statistics: Num rows: 6 Data size: 128 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + Map 12 + Map Operator Tree: + TableScan + alias: dim6 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f11 (type: int) + sort order: + + Map-reduce partition columns: f11 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f12 (type: int) + Map 14 + Map Operator Tree: + TableScan + alias: dim4 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f7 (type: int) + sort order: + + Map-reduce partition columns: f7 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f8 (type: int) Reducer 3 Reduce Operator Tree: Join Operator @@ -738,6 +748,17 @@ STAGE PLANS: Map-reduce partition columns: _col23 (type: int) Statistics: Num rows: 13 Data size: 281 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col28 (type: int) + Map 13 + Map Operator Tree: + TableScan + alias: dim5 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f9 (type: int) + sort order: + + Map-reduce partition columns: f9 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f10 (type: int) Reducer 4 Reduce Operator Tree: Join Operator @@ -754,6 +775,17 @@ STAGE PLANS: Map-reduce partition columns: _col28 (type: int) Statistics: Num rows: 14 Data size: 309 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int) + Map 11 + Map Operator Tree: + TableScan + alias: dim7 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: f13 (type: int) + sort order: + + Map-reduce partition columns: f13 (type: int) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: f14 (type: int) Reducer 5 Reduce Operator Tree: Join Operator @@ -775,38 +807,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col3} {KEY.reducesinkkey0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3, _col8, _col13 - Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col3 (type: int) - sort order: + - Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 6 Data size: 117 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) - Reducer 9 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3, _col8 - Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col8 (type: int) - sort order: + - Map-reduce partition columns: _col8 (type: int) - Statistics: Num rows: 6 Data size: 107 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/join_vc.q.out ql/src/test/results/clientpositive/spark/join_vc.q.out index e4d4724..a454f61 100644 --- ql/src/test/results/clientpositive/spark/join_vc.q.out +++ ql/src/test/results/clientpositive/spark/join_vc.q.out @@ -14,9 +14,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +60,21 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col0} + outputColumnNames: _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -94,21 +109,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {VALUE._col0} - outputColumnNames: _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/limit_pushdown.q.out ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 9ab8982..e20c4d5 100644 --- ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -814,9 +814,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 4 (GROUP, 1) Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -841,28 +841,6 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -887,30 +865,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 4 - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator @@ -949,6 +925,30 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 4 + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index 65f98a9..c2d5908 100644 --- ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -85,38 +85,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'k3' (type: string), ' ' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 85500 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'k1' (type: string), UDFToString(null) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 2 - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator @@ -134,6 +102,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'k3' (type: string), ' ' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 85500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 342 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 5 Reduce Operator Tree: Select Operator @@ -151,6 +135,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'k1' (type: string), UDFToString(null) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 85000 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 2 + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 7 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index 03dab8f..cf39293 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -273,21 +273,21 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] - Map 4 + Map 5 Map Operator Tree: TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: string) + key expressions: value (type: string) sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false Path -> Alias: @@ -295,7 +295,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -305,14 +305,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -325,35 +325,53 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [src1] - Map 5 + /src [src] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: 0 + auto parallelism: false + Map 4 Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: string) sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE tag: 1 auto parallelism: false Path -> Alias: @@ -361,7 +379,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -371,14 +389,14 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -391,38 +409,20 @@ STAGE PLANS: columns.comments defaultdefault columns.types string:string #### A masked pattern was here #### - name default.src + name default.src1 numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /src [src] - Reducer 2 - Needs Tagging: true - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: false + /src1 [src1] Reducer 3 Needs Tagging: true Reduce Operator Tree: @@ -498,19 +498,6 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -539,6 +526,19 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -600,19 +600,6 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), ds (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan @@ -642,6 +629,19 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out index 46e6be4..7c3baf8 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out @@ -56,20 +56,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -89,6 +75,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -298,20 +298,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -331,6 +317,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out index 69cbd58..32d5c2a 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out @@ -122,20 +122,6 @@ STAGE PLANS: Map-reduce partition columns: id (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: name (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: z - Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: id is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: id (type: int) - sort order: + - Map-reduce partition columns: id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: name (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -156,6 +142,20 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 0 Data size: 6 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: name (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/mergejoins.q.out ql/src/test/results/clientpositive/spark/mergejoins.q.out index 8ccf70a..3011c5f 100644 --- ql/src/test/results/clientpositive/spark/mergejoins.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins.q.out @@ -68,20 +68,6 @@ STAGE PLANS: Map-reduce partition columns: val1 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val2 (type: int) - Map 4 - Map Operator Tree: - TableScan - alias: e - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Filter Operator - predicate: val2 is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val2 (type: int) - sort order: + - Map-reduce partition columns: val2 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val1 (type: int) Map 5 Map Operator Tree: TableScan @@ -144,6 +130,20 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: int), _col5 (type: int), _col6 (type: int), _col10 (type: int), _col11 (type: int), _col15 (type: int), _col16 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: e + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Filter Operator + predicate: val2 is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val2 (type: int) + sort order: + + Map-reduce partition columns: val2 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val1 (type: int) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out index c667fdc..1a2ef8e 100644 --- ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out @@ -426,17 +426,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) Map 6 Map Operator Tree: TableScan @@ -466,6 +455,17 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -534,17 +534,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) Map 6 Map Operator Tree: TableScan @@ -574,6 +563,17 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -642,17 +642,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) Map 6 Map Operator Tree: TableScan @@ -682,6 +671,17 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col10 (type: string), _col11 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -726,9 +726,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -756,17 +756,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) Map 7 Map Operator Tree: TableScan @@ -781,27 +770,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Join Operator @@ -818,6 +786,17 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 5 Reduce Operator Tree: Join Operator @@ -834,6 +813,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -855,9 +855,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -885,17 +885,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) Map 7 Map Operator Tree: TableScan @@ -910,27 +899,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Join Operator @@ -947,6 +915,17 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 5 Reduce Operator Tree: Join Operator @@ -963,6 +942,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -984,9 +984,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1014,17 +1014,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) Map 7 Map Operator Tree: TableScan @@ -1039,27 +1028,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Join Operator @@ -1076,6 +1044,17 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 5 Reduce Operator Tree: Join Operator @@ -1092,6 +1071,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1113,9 +1113,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1143,17 +1143,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) Map 7 Map Operator Tree: TableScan @@ -1168,27 +1157,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Join Operator @@ -1205,6 +1173,17 @@ STAGE PLANS: Map-reduce partition columns: _col6 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 5 Reduce Operator Tree: Join Operator @@ -1221,6 +1200,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} {VALUE._col5} {VALUE._col9} {VALUE._col10} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11, _col15, _col16 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string), _col15 (type: string), _col16 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1244,8 +1244,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1273,17 +1273,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: value (type: string) Map 6 Map Operator Tree: TableScan @@ -1298,6 +1287,33 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -1321,22 +1337,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index e054664..93609d9 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -1614,31 +1614,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(DISTINCT value) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator expressions: value (type: string), key (type: string) outputColumnNames: value, key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1653,7 +1638,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -1672,8 +1657,28 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e1 - Reducer 3 + name: default.e3 + Map 4 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) @@ -1692,7 +1697,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.e3 + name: default.e1 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out index ef0d3ef..a3f913f 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out @@ -296,11 +296,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src_10 @@ -356,6 +356,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Map 5 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -407,26 +432,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 Reducer 3 Reduce Operator Tree: Group By Operator @@ -567,11 +572,75 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (SORT, 1) + Reducer 3 <- Map 5 (SORT, 1) + Reducer 2 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 200) or (key < 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 3 + Reduce Operator Tree: + Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + Filter Operator + predicate: (KEY._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + Map 4 Map Operator Tree: TableScan alias: src_10 @@ -627,19 +696,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Filter Operator - predicate: ((key > 200) or (key < 200)) (type: boolean) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -660,52 +716,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 - Reducer 3 - Reduce Operator Tree: - Forward - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 > 200) (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - Filter Operator - predicate: (KEY._col0 < 200) (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 Stage: Stage-4 Dependency Collection @@ -857,12 +867,52 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 7 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT key) + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + Map 5 Map Operator Tree: TableScan alias: src_10 @@ -916,6 +966,31 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Map 6 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -965,41 +1040,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: value, key - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT key) - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: double) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: double), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 Reducer 3 Reduce Operator Tree: Group By Operator @@ -1020,26 +1060,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 Stage: Stage-4 Dependency Collection @@ -1221,12 +1241,75 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 7 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 200) or (key < 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col1:0._col0 > 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv3 + Filter Operator + predicate: (KEY._col1:0._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv4 + Map 5 Map Operator Tree: TableScan alias: src_10 @@ -1280,6 +1363,31 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Map 6 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1329,38 +1437,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > 200) or (key < 200)) (type: boolean) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 Reducer 3 Reduce Operator Tree: Group By Operator @@ -1381,52 +1457,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 - Reduce Operator Tree: - Forward - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col1:0._col0 > 200) (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv3 - Filter Operator - predicate: (KEY._col1:0._col0 < 200) (type: boolean) - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv4 Stage: Stage-5 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out index dbf3a00..d85703e 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out @@ -54,34 +54,18 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 7 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator expressions: value (type: string) outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -97,22 +81,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Filter Operator - predicate: (key < 10) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi3 - Reducer 2 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -129,7 +98,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) @@ -142,8 +111,44 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Reducer 4 + name: default.src_multi2 + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -160,7 +165,7 @@ STAGE PLANS: sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 5 + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) @@ -173,7 +178,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + name: default.src_multi1 Stage: Stage-4 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index db158eb..c40222f 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -2417,9 +2417,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2439,29 +2440,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -2479,28 +2462,50 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2551,9 +2556,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2573,7 +2579,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2595,6 +2601,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -2613,19 +2636,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Select Operator @@ -2685,9 +2695,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2707,29 +2718,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -2747,28 +2740,50 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2819,9 +2834,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2841,29 +2857,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -2881,28 +2879,50 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2961,9 +2981,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3013,7 +3034,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3035,6 +3056,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3053,19 +3091,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Select Operator @@ -3206,9 +3231,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3258,29 +3284,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -3298,28 +3306,50 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3451,9 +3481,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3503,29 +3534,11 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -3543,28 +3556,50 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 4 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -3696,9 +3731,10 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Reducer 2 (SORT, 1) + Reducer 5 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 5 (SORT, 1) + Reducer 6 <- Map 1 (SORT, 1) + Reducer 4 <- Reducer 6 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3748,7 +3784,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3770,6 +3806,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -3788,19 +3841,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/multi_join_union.q.out ql/src/test/results/clientpositive/spark/multi_join_union.q.out index cfc354f..18de240 100644 --- ql/src/test/results/clientpositive/spark/multi_join_union.q.out +++ ql/src/test/results/clientpositive/spark/multi_join_union.q.out @@ -57,8 +57,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Union 5 (PARTITION-LEVEL SORT, 1) Union 5 <- Map 4 (NONE, 0), Map 7 (NONE, 0) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Union 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +75,36 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) Map 4 Map Operator Tree: TableScan @@ -92,20 +122,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) Map 7 Map Operator Tree: TableScan @@ -123,22 +139,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col6 (type: string) - sort order: + - Map-reduce partition columns: _col6 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Union 5 + Vertex: Union 5 Reducer 3 Reduce Operator Tree: Join Operator @@ -160,8 +162,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 5 - Vertex: Union 5 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out index 475f624..d8f2e3f 100644 --- ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out +++ ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out @@ -60,32 +60,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: tbl Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: c1 (type: int), c2 (type: int) - outputColumnNames: c1, c2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(c2) - keys: c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) - Select Operator expressions: c1 (type: int), c2 (type: int), c3 (type: int) outputColumnNames: c1, c2, c3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -101,17 +85,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -120,18 +104,39 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Reducer 3 + name: default.dest2 + Map 4 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(c2) + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -140,7 +145,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest1 Stage: Stage-3 Dependency Collection @@ -193,11 +198,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: tbl @@ -218,22 +223,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Select Operator - expressions: c2 (type: int), c1 (type: int), c3 (type: int) - outputColumnNames: c2, c1, c3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(c3) - keys: c2 (type: int), c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -254,6 +243,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Map 5 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: c2 (type: int), c1 (type: int), c3 (type: int) + outputColumnNames: c2, c1, c3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(c3) + keys: c2 (type: int), c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -326,32 +336,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: tbl Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) - outputColumnNames: c1, c2, c3, c4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(c4) - keys: c1 (type: int), c2 (type: int), c3 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col3 (type: bigint) - Select Operator expressions: c1 (type: int), c2 (type: int), c3 (type: int) outputColumnNames: c1, c2, c3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -367,17 +361,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -386,18 +380,39 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - Reducer 3 + name: default.dest2 + Map 4 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(c4) + keys: c1 (type: int), c2 (type: int), c3 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), UDFToInteger(_col2) (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -406,7 +421,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + name: default.dest3 Stage: Stage-3 Dependency Collection @@ -573,49 +588,17 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 2 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 7 Map Operator Tree: TableScan alias: tbl Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) - outputColumnNames: c1, c2, c3, c4 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(c4) - keys: c1 (type: int), c2 (type: int), c3 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col3 (type: bigint) - Select Operator - expressions: c1 (type: int), c2 (type: int), c3 (type: int) - outputColumnNames: c1, c2, c3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count(c3) - keys: c1 (type: int), c2 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col2 (type: bigint) - Select Operator expressions: c1 (type: int), c2 (type: int) outputColumnNames: c1, c2 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE @@ -631,17 +614,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -650,7 +633,28 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 + name: default.dest1 + Map 6 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: int), c3 (type: int) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(c3) + keys: c1 (type: int), c2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col2 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator @@ -671,17 +675,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 4 + Map 5 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: int), c3 (type: int), c4 (type: int) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count(c4) + keys: c1 (type: int), c2 (type: int), c3 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator - expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false @@ -690,7 +715,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 + name: default.dest3 Stage: Stage-4 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/ppd_join2.q.out ql/src/test/results/clientpositive/spark/ppd_join2.q.out index b98bc39..821289d 100644 --- ql/src/test/results/clientpositive/spark/ppd_join2.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join2.q.out @@ -76,6 +76,25 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Map 5 Map Operator Tree: TableScan @@ -97,25 +116,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Reducer 3 Reduce Operator Tree: Join Operator @@ -1763,6 +1763,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Map 5 Map Operator Tree: TableScan @@ -1781,22 +1797,6 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join4.q.out ql/src/test/results/clientpositive/spark/ppd_join4.q.out index 04e4758..0e57dc1 100644 --- ql/src/test/results/clientpositive/spark/ppd_join4.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join4.q.out @@ -52,8 +52,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -82,6 +82,14 @@ STAGE PLANS: key expressions: 'a' (type: string) sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 4 + Reduce Operator Tree: + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: 'a' (type: string) + sort order: + + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -102,14 +110,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: 'a' (type: string) - sort order: + - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join5.q.out ql/src/test/results/clientpositive/spark/ppd_join5.q.out index 54b6b34..778d173 100644 --- ql/src/test/results/clientpositive/spark/ppd_join5.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join5.q.out @@ -68,18 +68,6 @@ STAGE PLANS: Map-reduce partition columns: id (type: string), id (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: d (type: int) - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d <= 1) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: d (type: int) Map 5 Map Operator Tree: TableScan @@ -107,6 +95,18 @@ STAGE PLANS: sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d <= 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) Reducer 3 Reduce Operator Tree: Join Operator @@ -188,18 +188,6 @@ STAGE PLANS: Map-reduce partition columns: id (type: string), id (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: d (type: int) - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (d <= 1) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: d (type: int) Reducer 2 Reduce Operator Tree: Join Operator @@ -214,6 +202,18 @@ STAGE PLANS: sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col6 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (d <= 1) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: d (type: int) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out index 6ee0c8f..3573c77 100644 --- ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out +++ ql/src/test/results/clientpositive/spark/ppd_join_filter.q.out @@ -127,8 +127,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -274,6 +274,31 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) + Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -310,31 +335,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) - Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 21 Data size: 223 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -505,8 +505,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -652,6 +652,31 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -688,31 +713,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (_col2 < 5.0) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -883,8 +883,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1026,6 +1026,31 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) + Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -1062,31 +1087,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: ((_col2 < 5.0) and _col0 is not null) (type: boolean) - Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 42 Data size: 446 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -1257,8 +1257,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1404,6 +1404,31 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: (_col2 < 5.0) (type: boolean) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col3 (type: double), _col4 (type: double) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -1440,31 +1465,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (_col1 + 1) (type: double), (_col1 + 2) (type: double), (_col1 + 3) (type: double) - outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: (_col2 < 5.0) (type: boolean) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col3 (type: double), _col4 (type: double) - auto parallelism: false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out index 54fe587..ffcac72 100644 --- ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out +++ ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out @@ -10,8 +10,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -46,6 +46,18 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -70,18 +82,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out index d802069..e95c916 100644 --- ql/src/test/results/clientpositive/spark/semijoin.q.out +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -2424,16 +2424,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan @@ -2464,6 +2454,16 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 6 Data size: 49 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 11 Data size: 79 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out index c4a58c1..93b01f7 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_union_remove_1.q.out @@ -101,6 +101,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -129,26 +149,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -247,6 +247,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -275,26 +295,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -402,6 +402,27 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Map 5 Map Operator Tree: TableScan @@ -430,27 +451,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 Reducer 6 Reduce Operator Tree: Join Operator @@ -570,6 +570,27 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 Map 5 Map Operator Tree: TableScan @@ -598,27 +619,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out index a0fb663..5c06bfc 100644 --- ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out +++ ql/src/test/results/clientpositive/spark/skewjoin_union_remove_2.q.out @@ -127,6 +127,28 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 6 Map Operator Tree: TableScan @@ -169,28 +191,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out index d53b073..c3f3044 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt1.q.out @@ -87,6 +87,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -115,26 +135,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -233,6 +233,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -261,26 +281,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -349,9 +349,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Union 3 (GROUP, 1) Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -380,6 +380,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -406,7 +424,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -424,6 +442,8 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Union 3 + Vertex: Union 3 Reducer 4 Reduce Operator Tree: Group By Operator @@ -442,26 +462,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -495,9 +495,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Union 3 (GROUP, 1) Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -526,6 +526,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -552,7 +570,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -570,6 +588,8 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Union 3 + Vertex: Union 3 Reducer 4 Reduce Operator Tree: Group By Operator @@ -588,26 +608,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out index c68f954..91f25ad 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt10.q.out @@ -74,19 +74,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (key = '8')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Map 6 Map Operator Tree: TableScan @@ -101,20 +88,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: array) - Map 7 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key is not null and (key = '8')) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: array) Reducer 2 Reduce Operator Tree: Join Operator @@ -161,6 +134,33 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key = '8')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 7 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and (key = '8')) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: array) Reducer 5 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out index 0b7fd10..c1fb1e7 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt11.q.out @@ -66,10 +66,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 11 (NONE, 0), Reducer 2 (NONE, 0), Reducer 6 (NONE, 0), Reducer 9 (NONE, 0) #### A masked pattern was here #### Vertices: @@ -87,48 +87,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Map 10 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 12 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Map 13 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (not (key = '2'))) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) Map 4 Map Operator Tree: TableScan @@ -143,6 +101,29 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + SELECT * : (no compute) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -171,44 +152,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Map 8 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (key is not null and (key = '2')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) - Reducer 11 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - SELECT * : (no compute) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 2 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -231,7 +175,35 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Map 8 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 12 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (key = '2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 9 Reduce Operator Tree: Join Operator condition map: @@ -254,7 +226,35 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Map 10 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (not (key = '2'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Map 13 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (key is not null and (not (key = '2'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) + Reducer 11 Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out index 15ff759..5690b6b 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt12.q.out @@ -87,6 +87,26 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -113,26 +133,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out index c4b294b..fc80e35 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt13.q.out @@ -100,20 +100,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: val is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string) Map 5 Map Operator Tree: TableScan @@ -144,6 +130,20 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: val is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out index 9c955bf..04f6a35 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt14.q.out @@ -87,9 +87,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 1), Union 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -120,6 +120,22 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + SELECT * : (no compute) + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) Map 6 Map Operator Tree: TableScan @@ -134,20 +150,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Map 8 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: val is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: val (type: string) - sort order: + - Map-reduce partition columns: val (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: key (type: string) Map 9 Map Operator Tree: TableScan @@ -162,7 +164,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -178,6 +180,22 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) + Union 3 + Vertex: Union 3 + Map 8 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 0 Data size: 20 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: val is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: val (type: string) + sort order: + + Map-reduce partition columns: val (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: key (type: string) Reducer 4 Reduce Operator Tree: Join Operator @@ -199,24 +217,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - SELECT * : (no compute) - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: string), _col5 (type: string), _col6 (type: string) - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out index b1a0ee8..c634be5 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt15.q.out @@ -127,6 +127,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -155,26 +175,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -273,6 +273,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -301,26 +321,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: int) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -389,9 +389,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Union 3 (GROUP, 1) Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -420,6 +420,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -446,7 +464,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -464,6 +482,8 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Union 3 + Vertex: Union 3 Reducer 4 Reduce Operator Tree: Group By Operator @@ -482,26 +502,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -535,9 +535,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Union 3 (GROUP, 1) Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -566,6 +566,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + condition expressions: + 0 + 1 + Select Operator + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -592,7 +610,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -610,6 +628,8 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Union 3 + Vertex: Union 3 Reducer 4 Reduce Operator Tree: Group By Operator @@ -628,26 +648,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - condition expressions: - 0 - 1 - Select Operator - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out index 07c17f4..7775a49 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt16.q.out @@ -87,6 +87,26 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -113,26 +133,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out index e4e7169..635bfe9 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt17.q.out @@ -93,6 +93,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -121,26 +141,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -293,6 +293,26 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -319,26 +339,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out index d599cd5..0accb3b 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt19.q.out @@ -91,6 +91,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -119,26 +139,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out index ac1d6c6..d555c80 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt2.q.out @@ -95,6 +95,26 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -121,26 +141,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -234,6 +234,26 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -260,26 +280,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -347,9 +347,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Union 3 (GROUP, 1) Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -378,6 +378,30 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -404,7 +428,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -428,6 +452,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: bigint) + Union 3 + Vertex: Union 3 Reducer 4 Reduce Operator Tree: Group By Operator @@ -447,32 +473,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator @@ -507,9 +507,9 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Union 3 (GROUP, 1) Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1) Union 3 <- Reducer 2 (NONE, 0), Reducer 7 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -538,6 +538,30 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Select Operator + SELECT * : (no compute) + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -564,7 +588,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), val (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reducer 2 + Reducer 7 Reduce Operator Tree: Join Operator condition map: @@ -588,6 +612,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: bigint) + Union 3 + Vertex: Union 3 Reducer 4 Reduce Operator Tree: Group By Operator @@ -607,32 +633,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - SELECT * : (no compute) - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Union 3 - Vertex: Union 3 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out index 2ed3634..d71d436 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt20.q.out @@ -91,6 +91,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -119,26 +139,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out index 93cdbb8..8957705 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -91,6 +91,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -119,26 +139,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -237,6 +237,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Outer Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -265,26 +285,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Outer Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out index 1df961d..5ae1d76 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt4.q.out @@ -87,6 +87,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -115,26 +135,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator @@ -231,6 +231,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -259,26 +279,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out index 7712c0e..0e68eeb 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt5.q.out @@ -89,6 +89,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -117,26 +137,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out index bdab7de..5f58abc 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt6.q.out @@ -91,6 +91,26 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 5 Map Operator Tree: TableScan @@ -119,26 +139,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out index 3d46cf2..e641a57 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt7.q.out @@ -121,6 +121,28 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 6 Map Operator Tree: TableScan @@ -163,28 +185,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out index 76c674c..9c3a14f 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt8.q.out @@ -119,6 +119,28 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + 2 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + SELECT * : (no compute) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 6 Map Operator Tree: TableScan @@ -161,28 +183,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: val (type: string) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {KEY.reducesinkkey0} {VALUE._col0} - 2 {KEY.reducesinkkey0} {VALUE._col0} - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Operator - SELECT * : (no compute) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out index 5e2da76..e855ac3 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt9.q.out @@ -64,8 +64,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Union 4 (PARTITION-LEVEL SORT, 1) Union 4 <- Map 3 (NONE, 0), Map 5 (NONE, 0) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Union 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -110,6 +110,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) value expressions: _col1 (type: string) + Union 4 + Vertex: Union 4 Reducer 2 Reduce Operator Tree: Join Operator @@ -131,8 +133,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 4 - Vertex: Union 4 Stage: Stage-0 Fetch Operator @@ -230,20 +230,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string) - sort order: + - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: val (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -262,6 +248,20 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: val (type: string) Reducer 3 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index 4fda89b..493aa10 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -61,8 +61,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -89,6 +89,24 @@ STAGE PLANS: key expressions: 5 (type: int) sort order: + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE Map 5 Map Operator Tree: TableScan @@ -113,7 +131,7 @@ STAGE PLANS: key expressions: 5 (type: int) sort order: + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 6 Reduce Operator Tree: Join Operator condition map: @@ -121,16 +139,16 @@ STAGE PLANS: condition expressions: 0 1 - Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -155,24 +173,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 5 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/stats12.q.out ql/src/test/results/clientpositive/spark/stats12.q.out index 8c85371..88613e6 100644 --- ql/src/test/results/clientpositive/spark/stats12.q.out +++ ql/src/test/results/clientpositive/spark/stats12.q.out @@ -74,12 +74,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: hr=12 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 11 + hr 12 properties: COLUMN_STATS_ACCURATE false bucket_count -1 @@ -120,12 +120,12 @@ STAGE PLANS: name: default.analyze_srcpart #### A masked pattern was here #### Partition - base file name: hr=12 + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 - hr 12 + hr 11 properties: COLUMN_STATS_ACCURATE false bucket_count -1 @@ -165,8 +165,8 @@ STAGE PLANS: name: default.analyze_srcpart name: default.analyze_srcpart Truncated Path -> Alias: - /analyze_srcpart/ds=2008-04-08/hr=11 [analyze_srcpart] /analyze_srcpart/ds=2008-04-08/hr=12 [analyze_srcpart] + /analyze_srcpart/ds=2008-04-08/hr=11 [analyze_srcpart] Stage: Stage-2 Stats-Aggr Operator diff --git ql/src/test/results/clientpositive/spark/subquery_in.q.out ql/src/test/results/clientpositive/spark/subquery_in.q.out index 0856b37..dc90380 100644 --- ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -281,20 +281,6 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_size (type: int) - Map 5 - Map Operator Tree: - TableScan - alias: part - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(p_size) is not null (type: boolean) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(p_size) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(p_size) (type: double) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: p_name (type: string), p_size (type: int) Reducer 2 Reduce Operator Tree: Extract @@ -341,6 +327,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(p_size) is not null (type: boolean) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(p_size) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(p_size) (type: double) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: p_name (type: string), p_size (type: int) Reducer 4 Reduce Operator Tree: Join Operator @@ -417,9 +417,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -447,27 +447,6 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: p_mfgr (type: string), p_size (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {VALUE._col1} {KEY.reducesinkkey1} {KEY.reducesinkkey0} - 1 - outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Extract @@ -518,6 +497,27 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {KEY.reducesinkkey1} {KEY.reducesinkkey0} + 1 + outputColumnNames: _col1, _col2, _col5 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 931 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -577,8 +577,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -616,27 +616,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Group By Operator @@ -658,6 +637,27 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -740,9 +740,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 5 <- Map 4 (GROUP, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -781,6 +781,38 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) Map 6 Map Operator Tree: TableScan @@ -803,22 +835,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) Reducer 3 Reduce Operator Tree: Join Operator @@ -840,22 +856,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 85939c0..78404a6 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -68,27 +68,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) + Reducer 9 <- Map 8 (GROUP, 1) + Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Map 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 6 Map Operator Tree: TableScan @@ -111,23 +97,38 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 11 Map Operator Tree: TableScan - alias: s1 + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 Map 8 Map Operator Tree: TableScan @@ -147,6 +148,37 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 10 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -163,6 +195,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -199,50 +248,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Reducer 9 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-3 Dependency Collection @@ -501,66 +506,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) + Reducer 9 <- Map 8 (GROUP, 1) + Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string), value (type: string) - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string), value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: s1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map 8 Map Operator Tree: TableScan @@ -580,6 +532,37 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Map 10 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reducer 2 Reduce Operator Tree: Join Operator @@ -596,6 +579,23 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Join Operator @@ -632,6 +632,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Map 11 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 5 Reduce Operator Tree: Join Operator @@ -654,28 +686,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 - Reducer 9 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: 0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - keys: _col0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/temp_table.q.out ql/src/test/results/clientpositive/spark/temp_table.q.out index e75fa0e..e8b423d 100644 --- ql/src/test/results/clientpositive/spark/temp_table.q.out +++ ql/src/test/results/clientpositive/spark/temp_table.q.out @@ -208,8 +208,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) + Reducer 3 <- Union 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -240,6 +240,8 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + value expressions: _col1 (type: string) + Union 2 + Vertex: Union 2 Reducer 3 Reduce Operator Tree: Select Operator @@ -256,8 +258,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/tez_join_tests.q.out ql/src/test/results/clientpositive/spark/tez_join_tests.q.out index 14406df..e3bf788 100644 --- ql/src/test/results/clientpositive/spark/tez_join_tests.q.out +++ ql/src/test/results/clientpositive/spark/tez_join_tests.q.out @@ -29,17 +29,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) Map 7 Map Operator Tree: TableScan @@ -81,6 +70,17 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reducer 4 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out index 3f080a6..1296fca 100644 --- ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out +++ ql/src/test/results/clientpositive/spark/tez_joins_explain.q.out @@ -29,17 +29,6 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 6 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string) - sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) Map 7 Map Operator Tree: TableScan @@ -81,6 +70,17 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) Reducer 4 Reduce Operator Tree: Join Operator diff --git ql/src/test/results/clientpositive/spark/union10.q.out ql/src/test/results/clientpositive/spark/union10.q.out index 1e58d04..ed9978b 100644 --- ql/src/test/results/clientpositive/spark/union10.q.out +++ ql/src/test/results/clientpositive/spark/union10.q.out @@ -58,38 +58,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Map 6 - Map Operator Tree: - TableScan - alias: s3 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -109,6 +77,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator @@ -128,6 +112,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Map 6 + Map Operator Tree: + TableScan + alias: s3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reducer 7 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union18.q.out ql/src/test/results/clientpositive/spark/union18.q.out index 617a005..fe776cc 100644 --- ql/src/test/results/clientpositive/spark/union18.q.out +++ ql/src/test/results/clientpositive/spark/union18.q.out @@ -64,33 +64,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 Reducer 2 Reduce Operator Tree: Group By Operator @@ -120,6 +93,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Union 3 Vertex: Union 3 diff --git ql/src/test/results/clientpositive/spark/union19.q.out ql/src/test/results/clientpositive/spark/union19.q.out index c32fd33..9f23230 100644 --- ql/src/test/results/clientpositive/spark/union19.q.out +++ ql/src/test/results/clientpositive/spark/union19.q.out @@ -45,8 +45,8 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Union 3 (GROUP, 1) Union 3 <- Map 5 (NONE, 0), Reducer 2 (NONE, 0) + Reducer 4 <- Union 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,36 +65,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: s2 - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - value expressions: _col1 (type: bigint) - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 Reducer 2 Reduce Operator Tree: Group By Operator @@ -127,6 +97,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Union 3 + Vertex: Union 3 Reducer 4 Reduce Operator Tree: Group By Operator @@ -147,8 +149,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Union 3 - Vertex: Union 3 Stage: Stage-3 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/union23.q.out ql/src/test/results/clientpositive/spark/union23.q.out index e61b324..0e781ae 100644 --- ql/src/test/results/clientpositive/spark/union23.q.out +++ ql/src/test/results/clientpositive/spark/union23.q.out @@ -24,8 +24,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) + Reducer 3 <- Union 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +60,8 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ + Union 2 + Vertex: Union 2 Reducer 3 Reduce Operator Tree: Select Operator @@ -73,8 +75,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/union25.q.out ql/src/test/results/clientpositive/spark/union25.q.out index 4a36b29..854e212 100644 --- ql/src/test/results/clientpositive/spark/union25.q.out +++ ql/src/test/results/clientpositive/spark/union25.q.out @@ -67,10 +67,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (GROUP, 1) - Reducer 5 <- Union 4 (GROUP, 1) Union 2 <- Map 1 (NONE, 0), Map 6 (NONE, 0) + Reducer 3 <- Union 2 (GROUP, 1) Union 4 <- Map 7 (NONE, 0), Reducer 3 (NONE, 0) + Reducer 5 <- Union 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -109,26 +109,8 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Map 7 - Map Operator Tree: - TableScan - alias: a - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - value expressions: _col2 (type: bigint) + Union 2 + Vertex: Union 2 Reducer 3 Reduce Operator Tree: Group By Operator @@ -151,6 +133,28 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) value expressions: _col2 (type: bigint) + Map 7 + Map Operator Tree: + TableScan + alias: a + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + value expressions: _col2 (type: bigint) + Union 4 + Vertex: Union 4 Reducer 5 Reduce Operator Tree: Group By Operator @@ -171,10 +175,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_unionall - Union 2 - Vertex: Union 2 - Union 4 - Vertex: Union 4 Stage: Stage-2 Dependency Collection diff --git ql/src/test/results/clientpositive/spark/union28.q.out ql/src/test/results/clientpositive/spark/union28.q.out index 8d1beae..adfa658 100644 --- ql/src/test/results/clientpositive/spark/union28.q.out +++ ql/src/test/results/clientpositive/spark/union28.q.out @@ -85,27 +85,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string), value (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) Reducer 4 Reduce Operator Tree: Group By Operator @@ -129,6 +108,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reducer 6 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union30.q.out ql/src/test/results/clientpositive/spark/union30.q.out index fa2df51..2cc0f8b 100644 --- ql/src/test/results/clientpositive/spark/union30.q.out +++ ql/src/test/results/clientpositive/spark/union30.q.out @@ -82,6 +82,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.union_subq_union Map 4 Map Operator Tree: TableScan @@ -103,12 +126,18 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) - Map 6 - Map Operator Tree: - TableScan - alias: src + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 Select Operator - expressions: key (type: string), value (type: string) + expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) @@ -120,7 +149,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 7 + Map 6 Map Operator Tree: TableScan alias: src @@ -137,41 +166,12 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + Map 7 + Map Operator Tree: + TableScan + alias: src Select Operator - expressions: _col0 (type: string), _col1 (type: string) + expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) diff --git ql/src/test/results/clientpositive/spark/union4.q.out ql/src/test/results/clientpositive/spark/union4.q.out index ea232f4..533be3b 100644 --- ql/src/test/results/clientpositive/spark/union4.q.out +++ ql/src/test/results/clientpositive/spark/union4.q.out @@ -55,22 +55,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -90,6 +74,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union6.q.out ql/src/test/results/clientpositive/spark/union6.q.out index c166e1b..5eb0935 100644 --- ql/src/test/results/clientpositive/spark/union6.q.out +++ ql/src/test/results/clientpositive/spark/union6.q.out @@ -54,23 +54,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: s2 - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.tmptable Reducer 2 Reduce Operator Tree: Group By Operator @@ -90,6 +73,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Map 4 + Map Operator Tree: + TableScan + alias: s2 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tmptable Union 3 Vertex: Union 3 diff --git ql/src/test/results/clientpositive/spark/union_ppr.q.out ql/src/test/results/clientpositive/spark/union_ppr.q.out index 7611f3b..ba1dcde 100644 --- ql/src/test/results/clientpositive/spark/union_ppr.q.out +++ ql/src/test/results/clientpositive/spark/union_ppr.q.out @@ -112,8 +112,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Union 2 (SORT, 1) Union 2 <- Map 1 (NONE, 0), Map 4 (NONE, 0) + Reducer 3 <- Union 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -350,6 +350,8 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [y] /srcpart/ds=2008-04-08/hr=12 [y] + Union 2 + Vertex: Union 2 Reducer 3 Needs Tagging: false Reduce Operator Tree: @@ -378,8 +380,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Union 2 - Vertex: Union 2 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_1.q.out ql/src/test/results/clientpositive/spark/union_remove_1.q.out index 85ec617..ac5eb74 100644 --- ql/src/test/results/clientpositive/spark/union_remove_1.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_1.q.out @@ -95,6 +95,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -116,26 +136,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_10.q.out ql/src/test/results/clientpositive/spark/union_remove_10.q.out index aa49521..dcb737b 100644 --- ql/src/test/results/clientpositive/spark/union_remove_10.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_10.q.out @@ -115,6 +115,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -149,26 +169,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 Union 3 Vertex: Union 3 diff --git ql/src/test/results/clientpositive/spark/union_remove_15.q.out ql/src/test/results/clientpositive/spark/union_remove_15.q.out index fe2dcd1..5d62c1e 100644 --- ql/src/test/results/clientpositive/spark/union_remove_15.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_15.q.out @@ -101,6 +101,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -122,26 +142,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_16.q.out ql/src/test/results/clientpositive/spark/union_remove_16.q.out index 6902aeb..1252b56 100644 --- ql/src/test/results/clientpositive/spark/union_remove_16.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_16.q.out @@ -104,6 +104,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -125,26 +145,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), '2' (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_18.q.out ql/src/test/results/clientpositive/spark/union_remove_18.q.out index b901363..30b253f 100644 --- ql/src/test/results/clientpositive/spark/union_remove_18.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_18.q.out @@ -99,6 +99,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col2 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -120,26 +140,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col2 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_19.q.out ql/src/test/results/clientpositive/spark/union_remove_19.q.out index 48120ec..6b97973 100644 --- ql/src/test/results/clientpositive/spark/union_remove_19.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_19.q.out @@ -99,6 +99,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -120,26 +140,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -310,6 +310,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Select Operator + expressions: '7' (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -334,26 +354,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 15 Data size: 15 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col1 (type: bigint) - outputColumnNames: _col1 - Select Operator - expressions: '7' (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -483,27 +483,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -529,6 +508,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Map 4 + Map Operator Tree: + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_2.q.out ql/src/test/results/clientpositive/spark/union_remove_2.q.out index 8e58c46..1c3838f 100644 --- ql/src/test/results/clientpositive/spark/union_remove_2.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_2.q.out @@ -117,23 +117,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: inputtbl1 - Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 4 Reduce Operator Tree: Group By Operator @@ -154,6 +137,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Map 5 + Map Operator Tree: + TableScan + alias: inputtbl1 + Select Operator + expressions: key (type: string), UDFToLong(1) (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 2 Vertex: Union 2 diff --git ql/src/test/results/clientpositive/spark/union_remove_20.q.out ql/src/test/results/clientpositive/spark/union_remove_20.q.out index 13085a4..17c40b5 100644 --- ql/src/test/results/clientpositive/spark/union_remove_20.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_20.q.out @@ -97,6 +97,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col1 (type: bigint), _col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -118,26 +138,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col1 (type: bigint), _col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_21.q.out ql/src/test/results/clientpositive/spark/union_remove_21.q.out index 2cc70c5..50f26e3 100644 --- ql/src/test/results/clientpositive/spark/union_remove_21.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_21.q.out @@ -97,6 +97,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -118,26 +138,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_24.q.out ql/src/test/results/clientpositive/spark/union_remove_24.q.out index 5913610..c098832 100644 --- ql/src/test/results/clientpositive/spark/union_remove_24.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_24.q.out @@ -93,6 +93,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -114,26 +134,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_25.q.out ql/src/test/results/clientpositive/spark/union_remove_25.q.out index 58f764e..d4474a1 100644 --- ql/src/test/results/clientpositive/spark/union_remove_25.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_25.q.out @@ -111,6 +111,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -132,26 +152,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator @@ -321,22 +321,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 500 Data size: 5000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 500 - Statistics: Num rows: 500 Data size: 5000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator @@ -354,6 +338,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Map 4 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 500 + Statistics: Num rows: 500 Data size: 5000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Reducer 5 Reduce Operator Tree: Select Operator @@ -509,22 +509,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) - Map 4 - Map Operator Tree: - TableScan - alias: srcpart - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 1000 - Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reducer 2 Reduce Operator Tree: Select Operator @@ -542,6 +526,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 + Map 4 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1000 + Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1000 Data size: 10000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) Reducer 5 Reduce Operator Tree: Select Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_4.q.out ql/src/test/results/clientpositive/spark/union_remove_4.q.out index 909a924..f8a8d94 100644 --- ql/src/test/results/clientpositive/spark/union_remove_4.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_4.q.out @@ -100,6 +100,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -121,26 +141,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_5.q.out ql/src/test/results/clientpositive/spark/union_remove_5.q.out index fb24181..a86f036 100644 --- ql/src/test/results/clientpositive/spark/union_remove_5.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_5.q.out @@ -124,23 +124,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: inputtbl1 - Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 Reducer 4 Reduce Operator Tree: Group By Operator @@ -161,6 +144,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Map 5 + Map Operator Tree: + TableScan + alias: inputtbl1 + Select Operator + expressions: key (type: string), UDFToLong(1) (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 Union 2 Vertex: Union 2 diff --git ql/src/test/results/clientpositive/spark/union_remove_6.q.out ql/src/test/results/clientpositive/spark/union_remove_6.q.out index 73f038b..54fa8ec 100644 --- ql/src/test/results/clientpositive/spark/union_remove_6.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_6.q.out @@ -98,27 +98,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 - Map Operator Tree: - TableScan - alias: inputtbl1 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(1) - keys: key (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -149,6 +128,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Map 4 + Map Operator Tree: + TableScan + alias: inputtbl1 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_7.q.out ql/src/test/results/clientpositive/spark/union_remove_7.q.out index 49ec685..c79797c 100644 --- ql/src/test/results/clientpositive/spark/union_remove_7.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_7.q.out @@ -99,6 +99,26 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Map 4 Map Operator Tree: TableScan @@ -120,26 +140,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 Reducer 5 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/union_remove_8.q.out ql/src/test/results/clientpositive/spark/union_remove_8.q.out index e98957d..c1c319c 100644 --- ql/src/test/results/clientpositive/spark/union_remove_8.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_8.q.out @@ -121,23 +121,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: inputtbl1 - Select Operator - expressions: key (type: string), UDFToLong(1) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 Reducer 4 Reduce Operator Tree: Group By Operator @@ -158,6 +141,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Map 5 + Map Operator Tree: + TableScan + alias: inputtbl1 + Select Operator + expressions: key (type: string), UDFToLong(1) (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Union 2 Vertex: Union 2 diff --git ql/src/test/results/clientpositive/spark/union_remove_9.q.out ql/src/test/results/clientpositive/spark/union_remove_9.q.out index 66fe4e9..4184395 100644 --- ql/src/test/results/clientpositive/spark/union_remove_9.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_9.q.out @@ -131,26 +131,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 - Map Operator Tree: - TableScan - alias: inputtbl1 - Select Operator - expressions: key (type: string), 2 (type: int) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.outputtbl1 Reducer 4 Reduce Operator Tree: Group By Operator @@ -171,6 +151,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.outputtbl1 + Map 5 + Map Operator Tree: + TableScan + alias: inputtbl1 + Select Operator + expressions: key (type: string), 2 (type: int) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), UDFToLong(_col1) (type: bigint) + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.outputtbl1 Union 2 Vertex: Union 2 diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index d505232..adaf307 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -24,9 +24,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -63,6 +63,21 @@ STAGE PLANS: Map-reduce partition columns: cint (type: int) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -102,21 +117,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {VALUE._col0} - 1 - outputColumnNames: _col0 - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + - Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 0cdb7c8..dd5d048 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -30,9 +30,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 5 <- Map 4 (GROUP, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +71,39 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) Map 6 Map Operator Tree: TableScan @@ -93,22 +126,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) Reducer 3 Reduce Operator Tree: Join Operator @@ -130,23 +147,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -200,9 +200,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 5 <- Map 4 (GROUP, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -241,6 +241,39 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int), 1 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col3 (type: int) Map 6 Map Operator Tree: TableScan @@ -263,22 +296,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 6 Data size: 719 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {VALUE._col0} {VALUE._col1} - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int), 1 (type: int) - sort order: ++ - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 27 Data size: 3298 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col3 (type: int) Reducer 3 Reduce Operator Tree: Join Operator @@ -300,23 +317,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 4d4dd94..e3e2da1 100644 --- ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -10,9 +10,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (GROUP, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -58,6 +58,29 @@ STAGE PLANS: Map-reduce partition columns: ctinyint (type: tinyint) Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col1, _col5, _col15 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = _col15) (type: boolean) + Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: smallint), _col5 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reducer 2 Reduce Operator Tree: Join Operator @@ -100,29 +123,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 5 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col4} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1, _col5, _col15 - Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = _col15) (type: boolean) - Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: smallint), _col5 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 3f069ab..1629d42 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -1937,8 +1937,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2071,6 +2071,29 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -2107,29 +2130,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -2250,8 +2250,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2384,6 +2384,29 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -2420,29 +2443,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 1 - value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -4130,9 +4130,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -4265,6 +4265,29 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] + Reducer 5 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col5, _col7 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) + auto parallelism: false Reducer 2 Needs Tagging: true Reduce Operator Tree: @@ -4316,29 +4339,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 5 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE - tag: 0 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -5202,10 +5202,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 5 <- Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -5271,21 +5272,13 @@ STAGE PLANS: name: default.part_orc Truncated Path -> Alias: /part_orc [part_orc] - Reducer 2 + Reducer 7 Needs Tagging: false Reduce Operator Tree: Extract Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col5 (type: int) - sort order: ++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false Select Operator expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 @@ -5298,22 +5291,7 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: false - Reducer 3 - Needs Tagging: false - Reduce Operator Tree: - Extract - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - PTF Operator - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) - sort order: +++ - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) - auto parallelism: false - Reducer 4 + Reducer 5 Needs Tagging: false Reduce Operator Tree: Extract @@ -5321,12 +5299,12 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 2 + GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5336,21 +5314,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 - columns.comments - columns.types string:string:int:int:int:int:double:int + columns p_mfgr,p_name,p_size,r,dr,s + columns.comments + columns.types string:string:int:int:int:double #### A masked pattern was here #### - name default.part_5 - serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} + name default.part_4 + serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_5 + name: default.part_4 TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 5 + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col5 (type: int) + sort order: ++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Extract + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string), _col2 (type: string), _col1 (type: string) + sort order: +++ + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _wcol0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) + auto parallelism: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Extract @@ -5358,12 +5366,12 @@ STAGE PLANS: PTF Operator Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), _wcol0 (type: int), _wcol1 (type: int), _wcol2 (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), _wcol1 (type: int), _wcol2 (type: int), _wcol3 (type: double), _wcol4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - GlobalTableId: 1 + GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE @@ -5373,17 +5381,17 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 - columns p_mfgr,p_name,p_size,r,dr,s - columns.comments - columns.types string:string:int:int:int:double + columns p_mfgr,p_name,p_size,s2,r,dr,cud,fv1 + columns.comments + columns.types string:string:int:int:int:int:double:int #### A masked pattern was here #### - name default.part_4 - serialization.ddl struct part_4 { string p_mfgr, string p_name, i32 p_size, i32 r, i32 dr, double s} + name default.part_5 + serialization.ddl struct part_5 { string p_mfgr, string p_name, i32 p_size, i32 s2, i32 r, i32 dr, double cud, i32 fv1} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.part_4 + name: default.part_5 TotalFiles: 1 GatherStats: true MultiFileSpray: false