diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f3b01b2..4fadab0 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2847,6 +2847,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal true, "Allows hive server 2 to send progress bar update information. This is currently available" + " only if the execution engine is tez."), + + SPARK_USE_OP_STATS("hive.spark.use.op.stats", false, + "Whether to use operator stats for Hive on Spark. If this is false, Hive will only use " + + "source table stats to determine map-join and reducer parallelism (similar to MR)"), SPARK_EXEC_INPLACE_PROGRESS("hive.spark.exec.inplace.progress", true, "Updates spark job execution progress in-place in the terminal."), TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION("hive.tez.container.max.java.heap.fraction", 0.8f, diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java index 7a5b71f..a2a1976 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SetSparkReducerParallelism.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.optimizer.spark; import java.util.List; +import java.util.HashSet; +import java.util.Set; import java.util.Stack; import org.slf4j.Logger; @@ -29,7 +31,9 @@ import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities; import org.apache.hadoop.hive.ql.exec.spark.session.SparkSession; @@ -57,6 +61,12 @@ // Spark memory per task, and total number of cores private ObjectPair sparkMemoryAndCores; + private final boolean useOpStats; + + public SetSparkReducerParallelism(HiveConf conf) { + sparkMemoryAndCores = null; + useOpStats = conf.getBoolVar(HiveConf.ConfVars.SPARK_USE_OP_STATS); + } @Override public Object process(Node nd, Stack stack, @@ -67,16 +77,28 @@ public Object process(Node nd, Stack stack, ReduceSinkOperator sink = (ReduceSinkOperator) nd; ReduceSinkDesc desc = sink.getConf(); + Set parentSinks = null; int maxReducers = context.getConf().getIntVar(HiveConf.ConfVars.MAXREDUCERS); int constantReducers = context.getConf().getIntVar(HiveConf.ConfVars.HADOOPNUMREDUCERS); + if (!useOpStats) { + parentSinks = OperatorUtils.findOperatorsUpstream(sink, ReduceSinkOperator.class); + parentSinks.remove(sink); + if (!context.getVisitedReduceSinks().containsAll(parentSinks)) { + // We haven't processed all the parent sinks, and we need + // them to be done in order to compute the parallelism for this sink. + // In this case, skip. We should visit this again from another path. + LOG.debug("Skipping sink " + sink + " for now as we haven't seen all its parents."); + return false; + } + } + if (context.getVisitedReduceSinks().contains(sink)) { // skip walking the children LOG.debug("Already processed reduce sink: " + sink.getName()); return true; } - context.getVisitedReduceSinks().add(sink); if (needSetParallelism(sink, context.getConf())) { @@ -96,18 +118,43 @@ public Object process(Node nd, Stack stack, return false; } } + long numberOfBytes = 0; - // we need to add up all the estimates from the siblings of this reduce sink - for (Operator sibling - : sink.getChildOperators().get(0).getParentOperators()) { - if (sibling.getStatistics() != null) { - numberOfBytes += sibling.getStatistics().getDataSize(); - if (LOG.isDebugEnabled()) { - LOG.debug("Sibling " + sibling + " has stats: " + sibling.getStatistics()); + if (useOpStats) { + // we need to add up all the estimates from the siblings of this reduce sink + for (Operator sibling + : sink.getChildOperators().get(0).getParentOperators()) { + if (sibling.getStatistics() != null) { + numberOfBytes += sibling.getStatistics().getDataSize(); + if (LOG.isDebugEnabled()) { + LOG.debug("Sibling " + sibling + " has stats: " + sibling.getStatistics()); + } + } else { + LOG.warn("No stats available from: " + sibling); } + } + } else { + if (parentSinks.isEmpty()) { + // This is the first sink in the path, meaning that we should use TS stats + // to infer parallelism + Set sources = + OperatorUtils.findOperatorsUpstream(sink, TableScanOperator.class); + for (TableScanOperator source : sources) { + numberOfBytes += source.getStatistics().getDataSize(); + } + LOG.debug("Gathered stats for sink " + sink + ". Total size is " + + numberOfBytes + " bytes."); + } else { - LOG.warn("No stats available from: " + sibling); + int numberOfReducers = 0; + for (ReduceSinkOperator parent : parentSinks) { + numberOfReducers = Math.max(numberOfReducers, parent.getConf().getNumReducers()); + } + desc.setNumReducers(numberOfReducers); + LOG.debug("Set parallelism for sink " + sink + " to " + numberOfReducers + + " based on its parents"); + return false; } } @@ -134,7 +181,7 @@ public Object process(Node nd, Stack stack, desc.setNumReducers(numReducers); } } else { - LOG.info("Number of reducers determined to be: " + desc.getNumReducers()); + LOG.info("Number of reducers for sink " + sink + " was already determined to be: " + desc.getNumReducers()); } return false; @@ -165,6 +212,9 @@ private boolean needSetParallelism(ReduceSinkOperator reduceSink, HiveConf hiveC } private void getSparkMemoryAndCores(OptimizeSparkProcContext context) throws SemanticException { + if (sparkMemoryAndCores != null) { + return; + } if (context.getConf().getBoolean(SPARK_DYNAMIC_ALLOCATION_ENABLED, false)) { // If dynamic allocation is enabled, numbers for memory and cores are meaningless. So, we don't // try to get it. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java index 7b2b3c0..68f3ae4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java @@ -101,21 +101,21 @@ public ReduceWork createReduceWork(GenSparkProcContext context, Operator root reduceWork.setReducer(root); reduceWork.setNeedsTagging(GenMapRedUtils.needsTagging(reduceWork)); - // All parents should be reduce sinks. We pick the one we just walked - // to choose the number of reducers. In the join/union case they will - // all be -1. In sort/order case where it matters there will be only - // one parent. - Preconditions.checkArgument(context.parentOfRoot instanceof ReduceSinkOperator, - "AssertionError: expected context.parentOfRoot to be an instance of ReduceSinkOperator, but was " - + context.parentOfRoot.getClass().getName()); - ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot; - - reduceWork.setNumReduceTasks(reduceSink.getConf().getNumReducers()); + // Pick the maximum # reducers across all parents as the # of reduce tasks. + int maxExecutors = -1; + for (Operator parentOfRoot : root.getParentOperators()) { + Preconditions.checkArgument(parentOfRoot instanceof ReduceSinkOperator, + "AssertionError: expected parentOfRoot to be an " + + "instance of ReduceSinkOperator, but was " + + parentOfRoot.getClass().getName()); + ReduceSinkOperator reduceSink = (ReduceSinkOperator) parentOfRoot; + maxExecutors = Math.max(maxExecutors, reduceSink.getConf().getNumReducers()); + } + reduceWork.setNumReduceTasks(maxExecutors); + ReduceSinkOperator reduceSink = (ReduceSinkOperator) context.parentOfRoot; setupReduceSink(context, reduceWork, reduceSink); - sparkWork.add(reduceWork); - SparkEdgeProperty edgeProp = getEdgeProperty(reduceSink, reduceWork); sparkWork.connect(context.preceedingWork, reduceWork, edgeProp); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 71528e8..64a9703 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -47,7 +47,6 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lib.CompositeProcessor; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; import org.apache.hadoop.hive.ql.lib.ForwardWalker; @@ -55,6 +54,7 @@ import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.PreOrderWalker; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.TypeRule; @@ -269,8 +269,8 @@ private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws Seman ParseContext pCtx = procCtx.getParseContext(); Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("Set parallelism - ReduceSink", - ReduceSinkOperator.getOperatorName() + "%"), - new SetSparkReducerParallelism()); + ReduceSinkOperator.getOperatorName() + "%"), + new SetSparkReducerParallelism(pCtx.getConf())); opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx)); @@ -283,7 +283,7 @@ private void runJoinOptimizations(OptimizeSparkProcContext procCtx) throws Seman // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); - GraphWalker ogw = new DefaultGraphWalker(disp); + GraphWalker ogw = new PreOrderWalker(disp); // Create a list of topop nodes ArrayList topNodes = new ArrayList(); diff --git ql/src/test/results/clientpositive/spark/auto_join0.q.out ql/src/test/results/clientpositive/spark/auto_join0.q.out index bc9c5db..6df79e5 100644 --- ql/src/test/results/clientpositive/spark/auto_join0.q.out +++ ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -66,7 +66,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/auto_join23.q.out ql/src/test/results/clientpositive/spark/auto_join23.q.out index 7256539..9634073 100644 --- ql/src/test/results/clientpositive/spark/auto_join23.q.out +++ ql/src/test/results/clientpositive/spark/auto_join23.q.out @@ -33,7 +33,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out index a2fe528..88b491f 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -134,7 +134,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -569,7 +569,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 08d115d..72c7afd 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -1668,29 +1668,29 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1705,62 +1705,63 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index 9d6b523..e1d8d79 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -122,29 +122,29 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 3 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -159,62 +159,63 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 3 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 4 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 4 + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out index e21b7e6..9cfc21f 100644 --- ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out +++ ql/src/test/results/clientpositive/spark/cross_product_check_1.q.out @@ -340,7 +340,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out index d66a5ba..4c3e572 100644 --- ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out +++ ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A PREHOOK: type: QUERY POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A @@ -373,7 +373,7 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 diff --git ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out index bf010e7..5c344f6 100644 --- ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out +++ ql/src/test/results/clientpositive/spark/infer_bucket_sort_reducers_power_two.q.out @@ -92,7 +92,7 @@ Table: test_table #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 9 + numFiles 5 numRows 1028 rawDataSize 10968 totalSize 11996 @@ -143,7 +143,7 @@ Table: test_table #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 9 + numFiles 5 numRows 1028 rawDataSize 10968 totalSize 11996 @@ -194,7 +194,7 @@ Table: test_table #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 13 + numFiles 5 numRows 2654 rawDataSize 28466 totalSize 31120 @@ -245,7 +245,7 @@ Table: test_table #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 9 + numFiles 5 numRows 2654 rawDataSize 28466 totalSize 31120 @@ -296,7 +296,7 @@ Table: test_table #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} - numFiles 3 + numFiles 5 numRows 5 rawDataSize 19 totalSize 24 diff --git ql/src/test/results/clientpositive/spark/join0.q.out ql/src/test/results/clientpositive/spark/join0.q.out index 0db05a3..189e532 100644 --- ql/src/test/results/clientpositive/spark/join0.q.out +++ ql/src/test/results/clientpositive/spark/join0.q.out @@ -24,7 +24,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/join23.q.out ql/src/test/results/clientpositive/spark/join23.q.out index 40039db..316a54b 100644 --- ql/src/test/results/clientpositive/spark/join23.q.out +++ ql/src/test/results/clientpositive/spark/join23.q.out @@ -14,7 +14,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index 6ed0c68..14d1d4c 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -549,21 +549,21 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 4 Map Operator Tree: TableScan - alias: src1 + alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: string) + 0 _col1 (type: string) 1 _col0 (type: string) Local Work: Map Reduce Local Work @@ -571,7 +571,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: @@ -587,22 +587,34 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col2 (type: string) - Map 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + Local Work: + Map Reduce Local Work + Map 5 Map Operator Tree: TableScan - alias: src + alias: src1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -611,39 +623,27 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 - Local Work: - Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2 - input vertices: - 1 Map 5 + outputColumnNames: _col2 + Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index 17b7406..06eca3b 100644 --- ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -48,7 +48,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -170,7 +170,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1748,7 +1748,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index c3cdff3..1535359 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -171,9 +171,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Stage-1:MAPRED' is a cross product Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY @@ -306,9 +306,9 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Stage-1:MAPRED' is a cross product Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[30][bigTable=?] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 PREHOOK: type: QUERY PREHOOK: Input: default@smb_bucket_1 diff --git ql/src/test/results/clientpositive/spark/subquery_in.q.out ql/src/test/results/clientpositive/spark/subquery_in.q.out index 0fde046..7a2c481 100644 --- ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -1089,7 +1089,7 @@ STAGE PLANS: Spark Edges: Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1) - Reducer 12 <- Reducer 11 (GROUP, 2) + Reducer 12 <- Reducer 11 (GROUP, 1) Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (GROUP, 1) diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 3807511..d4ad7eb 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -460,7 +460,7 @@ POSTHOOK: Input: default@src_5 199 val_199 199 val_199 2 val_2 -Warning: Map Join MAPJOIN[47][bigTable=b] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain from src b INSERT OVERWRITE TABLE src_4 @@ -695,7 +695,7 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator -Warning: Map Join MAPJOIN[47][bigTable=b] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 select * diff --git ql/src/test/results/clientpositive/spark/union11.q.out ql/src/test/results/clientpositive/spark/union11.q.out index ff8cca1..bca91b2 100644 --- ql/src/test/results/clientpositive/spark/union11.q.out +++ ql/src/test/results/clientpositive/spark/union11.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP, 2), Reducer 5 (GROUP, 2), Reducer 7 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) Reducer 5 <- Map 4 (GROUP, 1) Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### @@ -187,6 +187,6 @@ POSTHOOK: query: select unionsrc.key, count(1) FROM (select 'tst1' as key, count POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -tst3 1 tst1 1 tst2 1 +tst3 1 diff --git ql/src/test/results/clientpositive/spark/union14.q.out ql/src/test/results/clientpositive/spark/union14.q.out index 0b99f68..7957539 100644 --- ql/src/test/results/clientpositive/spark/union14.q.out +++ ql/src/test/results/clientpositive/spark/union14.q.out @@ -18,7 +18,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2), Reducer 4 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 4 (GROUP, 1) Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: @@ -122,20 +122,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -273 1 -224 1 -255 1 10 -369 1 -213 1 -tst1 1 -66 1 -406 1 +128 1 +146 1 150 1 +213 1 +224 1 238 1 +255 1 +273 1 +278 1 +311 1 +369 1 401 1 +406 1 +66 1 98 1 -146 1 -311 1 -128 1 -278 1 +tst1 1 diff --git ql/src/test/results/clientpositive/spark/union15.q.out ql/src/test/results/clientpositive/spark/union15.q.out index 339bcbb..f54044f 100644 --- ql/src/test/results/clientpositive/spark/union15.q.out +++ ql/src/test/results/clientpositive/spark/union15.q.out @@ -21,7 +21,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 2), Map 5 (GROUP, 2), Reducer 2 (GROUP, 2) + Reducer 3 <- Map 4 (GROUP, 1), Map 5 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -147,20 +147,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -273 2 -224 2 -255 2 20 -369 2 -213 2 -tst1 1 -66 2 -406 2 +128 2 +146 2 150 2 +213 2 +224 2 238 2 +255 2 +273 2 +278 2 +311 2 +369 2 401 2 +406 2 +66 2 98 2 -146 2 -311 2 -128 2 -278 2 +tst1 1 diff --git ql/src/test/results/clientpositive/spark/union17.q.out ql/src/test/results/clientpositive/spark/union17.q.out index 9063e64..c280d07 100644 --- ql/src/test/results/clientpositive/spark/union17.q.out +++ ql/src/test/results/clientpositive/spark/union17.q.out @@ -39,8 +39,8 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 8 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2), Reducer 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 1), Reducer 9 (GROUP PARTITION-LEVEL SORT, 1) Reducer 8 <- Map 1 (GROUP, 1) Reducer 9 <- Map 1 (GROUP, 1) #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/union19.q.out ql/src/test/results/clientpositive/spark/union19.q.out index f7168fe..fbecce1 100644 --- ql/src/test/results/clientpositive/spark/union19.q.out +++ ql/src/test/results/clientpositive/spark/union19.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 2), Reducer 2 (GROUP, 2) + Reducer 3 <- Map 4 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 diff --git ql/src/test/results/clientpositive/spark/union20.q.out ql/src/test/results/clientpositive/spark/union20.q.out index 2141861..356f74d 100644 --- ql/src/test/results/clientpositive/spark/union20.q.out +++ ql/src/test/results/clientpositive/spark/union20.q.out @@ -29,7 +29,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) Reducer 6 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union3.q.out ql/src/test/results/clientpositive/spark/union3.q.out index 1bf9571..de6761d 100644 --- ql/src/test/results/clientpositive/spark/union3.q.out +++ ql/src/test/results/clientpositive/spark/union3.q.out @@ -41,7 +41,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) Reducer 5 <- Map 1 (GROUP, 1) Reducer 7 <- Map 1 (GROUP, 1) Reducer 9 <- Map 1 (GROUP, 1) diff --git ql/src/test/results/clientpositive/spark/union5.q.out ql/src/test/results/clientpositive/spark/union5.q.out index 111caa2..7d2fd00 100644 --- ql/src/test/results/clientpositive/spark/union5.q.out +++ ql/src/test/results/clientpositive/spark/union5.q.out @@ -17,7 +17,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Reducer 2 (GROUP, 2), Reducer 5 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1) Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: diff --git ql/src/test/results/clientpositive/spark/union7.q.out ql/src/test/results/clientpositive/spark/union7.q.out index 6cc5d6d..0508b8a 100644 --- ql/src/test/results/clientpositive/spark/union7.q.out +++ ql/src/test/results/clientpositive/spark/union7.q.out @@ -17,7 +17,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 2), Reducer 2 (GROUP, 2) + Reducer 3 <- Map 4 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -118,20 +118,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -273 1 -224 1 -255 1 10 -369 1 -213 1 -tst1 1 -66 1 -406 1 +128 1 +146 1 150 1 +213 1 +224 1 238 1 +255 1 +273 1 +278 1 +311 1 +369 1 401 1 +406 1 +66 1 98 1 -146 1 -311 1 -128 1 -278 1 +tst1 1