diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index c2e643d..a6d5c62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -113,7 +113,7 @@ public void initialize(HiveConf hiveConf) { // If optimize hive.optimize.bucketmapjoin.sortedmerge is set, add both // BucketMapJoinOptimizer and SortedMergeBucketMapJoinOptimizer if ((HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTSORTMERGEBUCKETMAPJOIN)) - && !isTezExecEngine) { + && !isTezExecEngine && !isSparkExecEngine) { if (!bucketMapJoinOptimizer) { // No need to add BucketMapJoinOptimizer twice transformations.add(new BucketMapJoinOptimizer()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkJoinOptimizer.java new file mode 100644 index 0000000..51bdaf4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkJoinOptimizer.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.spark; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext; + +import java.util.Stack; + +/** + * Converts a join to a more optimized join for the Spark path. Delegates to a more specialized join processor. + */ +public class SparkJoinOptimizer implements NodeProcessor { + + private SparkSortMergeJoinOptimizer smbJoinOptimizer; + private SparkMapJoinOptimizer mapJoinOptimizer; + + public SparkJoinOptimizer(ParseContext procCtx) { + smbJoinOptimizer = new SparkSortMergeJoinOptimizer(procCtx); + mapJoinOptimizer = new SparkMapJoinOptimizer(); + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + OptimizeSparkProcContext context = (OptimizeSparkProcContext) procCtx; + HiveConf conf = context.getConf(); + + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN_TOMAPJOIN)) { + Object mapJoinOp = mapJoinOptimizer.process(nd, stack, procCtx, nodeOutputs); + if (mapJoinOp == null) { + smbJoinOptimizer.process(nd, stack, procCtx, nodeOutputs); + } + } else { + Object sortMergeJoinOp = smbJoinOptimizer.process(nd, stack, procCtx, nodeOutputs); + if (sortMergeJoinOp == null) { + mapJoinOptimizer.process(nd, stack, procCtx, nodeOutputs); + } + } + return null; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 680c6fd..80665ec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -163,7 +163,7 @@ context.getMjOpSizes().put(mapJoinOp, mapJoinInfo[1] + mapJoinInfo[2]); - return null; + return mapJoinOp; } // replaces the join operator with a new CommonJoinOperator, removes the diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java index 83625ef..113832f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkReduceSinkMapJoinProc.java @@ -18,13 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.spark; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Stack; - +import com.google.common.base.Preconditions; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; @@ -53,13 +47,17 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; -import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty; import org.apache.hadoop.hive.ql.plan.SparkHashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; import org.apache.hadoop.hive.ql.plan.TableDesc; -import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; public class SparkReduceSinkMapJoinProc implements NodeProcessor { @@ -185,19 +183,6 @@ public Object process(Node nd, Stack stack, SparkWork sparkWork = context.currentTask.getWork(); LOG.debug("connecting "+parentWork.getName()+" with "+myWork.getName()); sparkWork.connect(parentWork, myWork, edgeProp); - - ReduceSinkOperator r; - if (parentRS.getConf().getOutputName() != null) { - LOG.debug("Cloning reduce sink for multi-child broadcast edge"); - // we've already set this one up. Need to clone for the next work. - r = (ReduceSinkOperator) OperatorFactory.getAndMakeChild( - (ReduceSinkDesc) parentRS.getConf().clone(), parentRS.getParentOperators()); - context.clonedReduceSinks.add(r); - } else { - r = parentRS; - } - // remember the output name of the reduce sink - r.getConf().setOutputName(myWork.getName()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinOptimizer.java new file mode 100644 index 0000000..6a47513 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinOptimizer.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.spark; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.AbstractSMBJoinProc; +import org.apache.hadoop.hive.ql.optimizer.SortBucketJoinProcCtx; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.parse.spark.OptimizeSparkProcContext; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +import java.util.Stack; + +/** + * Converts a join operator to an SMB join if eligible. + */ +public class SparkSortMergeJoinOptimizer extends AbstractSMBJoinProc implements NodeProcessor { + + public SparkSortMergeJoinOptimizer(ParseContext pctx) { + super(pctx); + } + + public SparkSortMergeJoinOptimizer() { + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + JoinOperator joinOp = (JoinOperator) nd; + HiveConf conf = ((OptimizeSparkProcContext) procCtx).getParseContext().getConf(); + + if (!conf.getBoolVar(HiveConf.ConfVars.HIVE_AUTO_SORTMERGE_JOIN)) { + return null; + } + + SortBucketJoinProcCtx smbJoinContext = new SortBucketJoinProcCtx(conf); + + boolean convert = + canConvertJoinToSMBJoin( + joinOp, smbJoinContext, pGraphContext, stack); + + if (convert) { + return convertJoinToSMBJoinAndReturn(joinOp, smbJoinContext, pGraphContext); + } + return null; + } + + protected boolean canConvertJoinToSMBJoin(JoinOperator joinOperator, SortBucketJoinProcCtx smbJoinContext, + ParseContext pGraphContext, Stack stack) throws SemanticException { + if (!supportBucketMapJoin(stack)) { + return false; + } + return canConvertJoinToSMBJoin(joinOperator, smbJoinContext, pGraphContext); + } + + //Preliminary checks. In the MR version of the code, these used to be done via another walk, refactoring to be inline. + private boolean supportBucketMapJoin(Stack stack) { + int size = stack.size(); + if (!(stack.get(size-1) instanceof JoinOperator) || + !(stack.get(size-2) instanceof ReduceSinkOperator)) { + return false; + } + + // If any operator in the stack does not support a auto-conversion, this join should + // not be converted. + for (int pos = size -3; pos >= 0; pos--) { + Operator op = (Operator)stack.get(pos); + if (!op.supportAutomaticSortMergeJoin()) { + return false; + } + } + return true; + } + + protected SMBMapJoinOperator convertJoinToSMBJoinAndReturn( + JoinOperator joinOp, + SortBucketJoinProcCtx smbJoinContext, + ParseContext parseContext) throws SemanticException { + MapJoinOperator mapJoinOp = convertJoinToBucketMapJoin(joinOp, smbJoinContext, parseContext); + SMBMapJoinOperator smbMapJoinOp = + convertBucketMapJoinToSMBJoin(mapJoinOp, smbJoinContext, parseContext); + smbMapJoinOp.setConvertedAutomaticallySMBJoin(true); + return smbMapJoinOp; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java index 5e432ac..ba855fc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java @@ -100,24 +100,6 @@ public Object process(Node nd, Stack stack, } SparkWork sparkWork = context.currentTask.getWork(); - - if (GenSparkUtils.getChildOperator(root, DummyStoreOperator.class) != null) { - /* - * SMB join case: - * - * (Big) (Small) (Small) - * TS TS TS - * \ | / - * \ DS DS - * \ | / - * SMBJoinOP - * - * Only create MapWork rooted at TS of big table. - * If there are dummy-store operators anywhere in TS's children path, then this is for the small tables. - * No separate Map-Task need to be created for small table TS, as they will be read by the MapWork of the big-table. - */ - return null; - } SMBMapJoinOperator smbOp = GenSparkUtils.getChildOperator(root, SMBMapJoinOperator.class); // Right now the work graph is pretty simple. If there is no diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index b6a7ac2..835f523 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ConditionalTask; +import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -46,9 +47,9 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lib.CompositeProcessor; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.ForwardWalker; import org.apache.hadoop.hive.ql.lib.GraphWalker; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; @@ -65,7 +66,7 @@ import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger; import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer; import org.apache.hadoop.hive.ql.optimizer.spark.SetSparkReducerParallelism; -import org.apache.hadoop.hive.ql.optimizer.spark.SparkMapJoinOptimizer; +import org.apache.hadoop.hive.ql.optimizer.spark.SparkJoinOptimizer; import org.apache.hadoop.hive.ql.optimizer.spark.SparkReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.optimizer.spark.SparkSkewJoinResolver; import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory; @@ -106,31 +107,31 @@ public void init(HiveConf conf, LogHelper console, Hive db) { @Override protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, Set outputs) throws SemanticException { - // TODO: need to add spark specific optimization. // Sequence of TableScan operators to be walked Deque> deque = new LinkedList>(); deque.addAll(pCtx.getTopOps().values()); - // Create the context for the walker - OptimizeSparkProcContext procCtx - = new OptimizeSparkProcContext(conf, pCtx, inputs, outputs, deque); + OptimizeSparkProcContext procCtx = new OptimizeSparkProcContext(conf, pCtx, inputs, outputs, deque); // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("Set parallelism - ReduceSink", - ReduceSinkOperator.getOperatorName() + "%"), - new SetSparkReducerParallelism()); + ReduceSinkOperator.getOperatorName() + "%"), + new SetSparkReducerParallelism()); // TODO: need to research and verify support convert join to map join optimization. opRules.put(new RuleRegExp(new String("Convert Join to Map-join"), - JoinOperator.getOperatorName() + "%"), new SparkMapJoinOptimizer()); + JoinOperator.getOperatorName() + "%"), new SparkJoinOptimizer(pCtx)); + // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); - List topNodes = new ArrayList(); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); topNodes.addAll(pCtx.getTopOps().values()); - GraphWalker ogw = new ForwardWalker(disp); ogw.startWalking(topNodes, null); } @@ -155,8 +156,7 @@ protected void generateTaskTree(List> rootTasks, Pa opRules.put(new RuleRegExp("Split Work - ReduceSink", ReduceSinkOperator.getOperatorName() + "%"), genSparkWork); - opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", - MapJoinOperator.getOperatorName() + "%"), new SparkReduceSinkMapJoinProc()); + opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc()); opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), @@ -181,6 +181,32 @@ public Object process(Node n, Stack s, } ); + /** + * SMB join case: (Big) (Small) (Small) + * TS TS TS + * \ | / + * \ DS DS + * \ | / + * SMBJoinOP + * + * Some of the other processors are expecting only one traversal beyond SMBJoinOp. + * We need to traverse from the big-table path only, and stop traversing on the small-table path once we reach SMBJoinOp. + */ + opRules.put(new TypeRule(SMBMapJoinOperator.class), + new NodeProcessor() { + @Override + public Object process(Node currNode, Stack stack, + NodeProcessorCtx procCtx, Object... os) throws SemanticException { + for (Node stackNode : stack) { + if (stackNode instanceof DummyStoreOperator) { + return true; + } + } + return false; + } + } + ); + // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); @@ -190,7 +216,6 @@ public Object process(Node n, Stack s, ogw.startWalking(topNodes, null); - // ------------------- Second Pass ----------------------- // SMB Join optimizations to add the "localWork" and bucketing data structures to MapWork. opRules.clear(); diff --git ql/src/test/results/clientpositive/spark/auto_join32.q.out ql/src/test/results/clientpositive/spark/auto_join32.q.out index 28c022e..fd52307 100644 --- ql/src/test/results/clientpositive/spark/auto_join32.q.out +++ ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -193,15 +193,18 @@ STAGE PLANS: 0 name (type: string) 1 name (type: string) outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator aggregations: count(DISTINCT _col8) keys: _col0 (type: string), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -209,11 +212,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -312,15 +318,18 @@ STAGE PLANS: 0 name (type: string) 1 name (type: string) outputColumnNames: _col0, _col8 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator aggregations: count(DISTINCT _col8) keys: _col0 (type: string), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -328,11 +337,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -445,11 +457,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_join_stats.q.out ql/src/test/results/clientpositive/spark/auto_join_stats.q.out index bccd246..50ec0e4 100644 --- ql/src/test/results/clientpositive/spark/auto_join_stats.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_stats.q.out @@ -42,28 +42,28 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} - 1 + 0 {_col0} {_col5} + 1 {key} keys: - 0 key (type: string) - 1 key (type: string) + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -74,65 +74,63 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 + _col5) is not null (type: boolean) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 + _col5) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + _col5) (type: double) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: smalltable - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) + key expressions: key (type: string) sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {VALUE._col0} {VALUE._col5} - 1 {VALUE._col0} - outputColumnNames: _col0, _col5, _col10 - Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} + 1 {key} + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 4 Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -182,8 +180,7 @@ POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: @@ -191,50 +188,45 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 4 Map Operator Tree: TableScan - alias: smalltable2 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + alias: smalltable + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {_col0} {_col5} {_col10} - 1 + 0 {_col0} {_col5} + 1 {key} keys: 0 (_col0 + _col5) (type: double) 1 UDFToDouble(key) (type: double) Local Work: Map Reduce Local Work - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 3 + Map 5 Map Operator Tree: TableScan - alias: src2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: smalltable2 + Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Spark HashTable Sink Operator condition expressions: - 0 {key} + 0 {_col0} {_col5} {_col10} 1 keys: - 0 key (type: string) - 1 key (type: string) + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) Local Work: Map Reduce Local Work Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -245,44 +237,24 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 {key} - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 + _col5) is not null (type: boolean) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: (_col0 + _col5) (type: double) - sort order: + - Map-reduce partition columns: (_col0 + _col5) (type: double) - Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col5 (type: string) - Local Work: - Map Reduce Local Work - Map 4 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 3 Map Operator Tree: TableScan - alias: smalltable - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) + key expressions: key (type: string) sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - value expressions: key (type: string) + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Local Work: Map Reduce Local Work @@ -291,37 +263,53 @@ STAGE PLANS: condition map: Inner Join 0 to 1 condition expressions: - 0 {VALUE._col0} {VALUE._col5} - 1 {VALUE._col0} - outputColumnNames: _col0, _col5, _col10 - Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + 0 {KEY.reducesinkkey0} + 1 {KEY.reducesinkkey0} + outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col5) is not null (type: boolean) - Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: - 0 {_col0} {_col5} {_col10} - 1 + 0 {_col0} {_col5} + 1 {key} keys: 0 (_col0 + _col5) (type: double) 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col5, _col10 input vertices: - 1 Map 5 - Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + 1 Map 4 + Statistics: Num rows: 151 Data size: 1611 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 76 Data size: 810 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col5} {_col10} + 1 + keys: + 0 (_col0 + _col5) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col5, _col10 + input vertices: + 1 Map 5 Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 891 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out index 2e35c66..423f5fd 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out @@ -172,12 +172,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -290,14 +293,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -404,12 +410,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -522,14 +531,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -606,10 +618,94 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Stage: Stage-1 Spark Edges: @@ -626,7 +722,7 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -635,16 +731,29 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) + input vertices: + 1 Map 3 Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + BucketMapJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -754,14 +863,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out index b2e928f..5e061f3 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out @@ -102,14 +102,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -118,8 +122,10 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -286,14 +292,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -302,8 +312,10 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -441,13 +453,36 @@ INSERT OVERWRITE TABLE dest1 select key1, key2 INSERT OVERWRITE TABLE dest2 select value1, value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} {value} + 1 {value} + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-2 Spark #### A masked pattern was here #### @@ -460,7 +495,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -470,14 +505,20 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 2 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -486,13 +527,17 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Local Work: + Map Reduce Local Work Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out index 20ee657..ae299a1 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out @@ -43,10 +43,30 @@ explain select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -58,7 +78,7 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: @@ -67,24 +87,34 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -126,22 +156,42 @@ explain select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: @@ -150,24 +200,34 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 207 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 2 + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out index 0a48d00..cb9da13 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out @@ -41,10 +41,30 @@ POSTHOOK: query: explain select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -56,7 +76,7 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: @@ -65,24 +85,34 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -101,22 +131,42 @@ POSTHOOK: query: explain select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: @@ -125,24 +175,34 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 2 + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index 5008a3f..bc985fd 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -152,12 +152,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -270,14 +273,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -356,10 +362,94 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Stage: Stage-1 Spark Edges: @@ -376,7 +466,7 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -385,16 +475,29 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) + input vertices: + 1 Map 3 Position of Big Table: 0 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + BucketMapJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -504,14 +607,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out index 3b081af..60a5a6f 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out @@ -152,12 +152,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -221,14 +224,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -335,12 +341,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -404,14 +413,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -488,10 +500,143 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Stage: Stage-1 Spark Edges: @@ -508,7 +653,7 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -517,16 +662,29 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) + input vertices: + 1 Map 3 Position of Big Table: 0 + Statistics: Num rows: 31 Data size: 3196 Basic stats: COMPLETE Column stats: NONE + BucketMapJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -587,14 +745,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index 2a11fb2..b376417 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -168,12 +168,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -237,14 +240,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -351,12 +357,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -420,14 +429,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -504,10 +516,143 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Stage: Stage-1 Spark Edges: @@ -524,7 +669,7 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -533,16 +678,29 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) + input vertices: + 1 Map 3 Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + BucketMapJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -603,14 +761,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out index 0d971d2..d97a06f 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -133,12 +133,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -198,14 +201,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -306,12 +312,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -371,14 +380,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -449,10 +461,90 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: bucket_small + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small [b] + Stage: Stage-1 Spark Edges: @@ -469,7 +561,7 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -478,16 +570,29 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) + input vertices: + 1 Map 3 Position of Big Table: 0 + Statistics: Num rows: 15 Data size: 1567 Basic stats: COMPLETE Column stats: NONE + BucketMapJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -544,14 +649,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out index 9d455dc..2aef1b3 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out @@ -118,10 +118,12 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -143,12 +145,15 @@ STAGE PLANS: condition expressions: 0 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -156,11 +161,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -225,10 +233,12 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -250,12 +260,15 @@ STAGE PLANS: condition expressions: 0 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -263,11 +276,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -332,10 +348,12 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -357,12 +375,15 @@ STAGE PLANS: condition expressions: 0 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -370,11 +391,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -551,12 +575,15 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -564,11 +591,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -629,10 +659,12 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -654,12 +686,15 @@ STAGE PLANS: condition expressions: 0 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -667,11 +702,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -732,10 +770,12 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -757,12 +797,15 @@ STAGE PLANS: condition expressions: 0 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -770,11 +813,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -951,12 +997,15 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) 2 key (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -964,11 +1013,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1029,10 +1081,12 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map 4 Map Operator Tree: TableScan @@ -1054,12 +1108,15 @@ STAGE PLANS: condition expressions: 0 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -1067,11 +1124,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index 61eb6ae..5ccb622 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -185,12 +185,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -303,14 +306,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -419,12 +425,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -537,14 +546,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -623,10 +635,143 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 452 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 2 Data size: 226 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Stage: Stage-1 Spark Edges: @@ -643,7 +788,7 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -652,16 +797,29 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) + input vertices: + 1 Map 3 Position of Big Table: 0 + Statistics: Num rows: 29 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + BucketMapJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -771,14 +929,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out index 198d50d..814d4c4 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out @@ -185,12 +185,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 1 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -303,14 +306,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -419,12 +425,15 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false @@ -537,14 +546,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -625,10 +637,143 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Stage: Stage-1 Spark Edges: @@ -645,7 +790,7 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -654,16 +799,29 @@ STAGE PLANS: keys: 0 key (type: string) 1 key (type: string) + input vertices: + 1 Map 3 Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + BucketMapJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false + Local Work: + Map Reduce Local Work + Bucket Mapjoin Context: + Alias Bucket File Name Mapping: +#### A masked pattern was here #### + Alias Bucket Output File Name Mapping: +#### A masked pattern was here #### Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -773,14 +931,17 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index f59e57f..82a2e34 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -82,12 +82,15 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -95,11 +98,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -171,15 +177,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -188,11 +197,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -280,15 +292,18 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -297,13 +312,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -311,11 +330,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -391,19 +413,18 @@ join on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 3) - Reducer 5 <- Map 4 (GROUP, 3) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 3), Reducer 5 (PARTITION-LEVEL SORT, 3) + Reducer 4 <- Map 3 (GROUP, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -421,17 +442,44 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Reducer 4 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 3) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -449,58 +497,53 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Reducer 4 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -602,12 +645,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -615,11 +661,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -710,12 +759,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -723,11 +775,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -842,12 +897,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -855,11 +913,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -964,12 +1025,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -977,11 +1041,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1193,12 +1260,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -1206,11 +1276,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1281,12 +1354,15 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -1294,11 +1370,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1386,12 +1465,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -1399,11 +1481,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1502,12 +1587,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 key (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -1515,11 +1603,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1575,10 +1666,33 @@ select count(*) from ( ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -1593,7 +1707,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -1602,24 +1716,34 @@ STAGE PLANS: keys: 0 key (type: int) 1 key (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1663,10 +1787,33 @@ select key, count(*) from group by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -1681,7 +1828,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -1691,16 +1838,23 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -1708,11 +1862,14 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1771,10 +1928,33 @@ select count(*) from ) subq2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -1790,7 +1970,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -1800,16 +1980,23 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator @@ -1817,13 +2004,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Select Operator + Statistics: Num rows: 2 Data size: 15 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -1831,11 +2022,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1911,19 +2105,42 @@ join on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2, Stage-4 + Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 3) Reducer 5 <- Map 4 (GROUP, 3) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 3), Reducer 5 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -1931,7 +2148,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -1941,17 +2158,48 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Local Work: + Map Reduce Local Work + Reducer 5 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 3) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -1959,7 +2207,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -1969,67 +2217,88 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 {VALUE._col0} - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from -( + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 41 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {key} + 1 + keys: + 0 key (type: int) + 1 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( select key, count(*) as cnt1 from ( select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key @@ -2091,10 +2360,37 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -2113,7 +2409,7 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -2122,24 +2418,34 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2160,6 +2466,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### +Status: Failed +Status: Failed POSTHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -2169,7 +2477,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### -20 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should -- be converted to a sort-merge join, although there is more than one level of sub-query explain @@ -2199,14 +2506,13 @@ select count(*) from on subq2.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2221,33 +2527,67 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 + Spark HashTable Sink Operator condition expressions: 0 1 keys: 0 _col0 (type: int) 1 key (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reducer 2 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2273,6 +2613,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### +Status: Failed +Status: Failed POSTHOOK: query: select count(*) from ( select * from @@ -2287,7 +2629,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### -20 PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. -- The join should be converted to a sort-merge join explain @@ -2331,10 +2672,37 @@ select count(*) from on subq2.key = subq4.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -2353,7 +2721,7 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -2362,24 +2730,34 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2411,6 +2789,8 @@ PREHOOK: query: select count(*) from PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 #### A masked pattern was here #### +Status: Failed +Status: Failed POSTHOOK: query: select count(*) from ( select * from @@ -2431,7 +2811,6 @@ POSTHOOK: query: select count(*) from POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 #### A masked pattern was here #### -20 PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key -- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one -- item, but that is not part of the join key. @@ -2453,10 +2832,37 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -2475,7 +2881,7 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -2484,24 +2890,34 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) + input vertices: + 1 Map 3 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2522,6 +2938,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### +Status: Failed +Status: Failed POSTHOOK: query: select count(*) from (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 join @@ -2531,7 +2949,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### -20 PREHOOK: query: -- The left table is a sub-query and the right table is not. -- It should be converted to a sort-merge join. explain @@ -2547,14 +2964,13 @@ select count(*) from join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2569,33 +2985,67 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 + Spark HashTable Sink Operator condition expressions: 0 1 keys: 0 _col0 (type: int) 1 key (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reducer 2 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2614,6 +3064,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### +Status: Failed +Status: Failed POSTHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join tbl2 a on subq1.key = a.key @@ -2621,7 +3073,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### -20 PREHOOK: query: -- The right table is a sub-query and the left table is not. -- It should be converted to a sort-merge join. explain @@ -2639,17 +3090,44 @@ select count(*) from tbl1 a on a.key = subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: a @@ -2657,7 +3135,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 condition expressions: @@ -2666,24 +3144,34 @@ STAGE PLANS: keys: 0 key (type: int) 1 _col0 (type: int) + input vertices: + 1 Map 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 2 + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2703,6 +3191,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### +Status: Failed +Status: Failed POSTHOOK: query: select count(*) from tbl1 a join (select a.key as key, a.value as value from tbl2 a where key < 6) subq1 @@ -2711,7 +3201,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### -20 PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. -- It should be converted to to a sort-merge join explain @@ -2737,10 +3226,62 @@ select count(*) from on (subq1.key = subq3.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark Edges: @@ -2759,7 +3300,7 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 @@ -2771,24 +3312,35 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) + input vertices: + 1 Map 3 + 2 Map 4 + Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2812,6 +3364,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### +Status: Failed +Status: Failed POSTHOOK: query: select count(*) from (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 join @@ -2824,7 +3378,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### -56 PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. -- The join should be converted to a sort-merge join explain @@ -2856,14 +3409,13 @@ join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2878,33 +3430,67 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 + Spark HashTable Sink Operator condition expressions: 0 1 keys: 0 _col0 (type: int) 1 key (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reducer 2 + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + input vertices: + 0 Map 1 + Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/parquet_join.q.out ql/src/test/results/clientpositive/spark/parquet_join.q.out index 240989a..3bcdef2 100644 --- ql/src/test/results/clientpositive/spark/parquet_join.q.out +++ ql/src/test/results/clientpositive/spark/parquet_join.q.out @@ -319,11 +319,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col1, _col7 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col7 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out index 268ae23..6293450 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out @@ -181,138 +181,237 @@ JOIN test_table6 f ON a.key = f.key JOIN test_table7 g ON a.key = g.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3), Map 6 (PARTITION-LEVEL SORT, 3), Map 7 (PARTITION-LEVEL SORT, 3), Map 8 (PARTITION-LEVEL SORT, 3), Map 9 (PARTITION-LEVEL SORT, 3) - Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: a + alias: b Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan - alias: c + alias: d Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work Map 6 Map Operator Tree: TableScan - alias: d + alias: e Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work Map 7 Map Operator Tree: TableScan - alias: e + alias: f Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work Map 8 Map Operator Tree: TableScan - alias: f + alias: g Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Map 9 + Spark HashTable Sink Operator + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: g + alias: a Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + Inner Join 0 to 4 + Inner Join 0 to 5 + Inner Join 0 to 6 + condition expressions: + 0 + 1 + 2 + 3 + 4 + 5 + 6 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + 5 key (type: int) + 6 key (type: int) + input vertices: + 1 Map 3 + 2 Map 4 + 3 Map 5 + 4 Map 6 + 5 Map 7 + 6 Map 8 + Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reducer 2 Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - Inner Join 0 to 4 - Inner Join 0 to 5 - Inner Join 0 to 6 - condition expressions: - 0 - 1 - 2 - 3 - 4 - 5 - 6 - Statistics: Num rows: 33 Data size: 231 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 - Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial @@ -431,12 +530,15 @@ STAGE PLANS: 4 key (type: int) 5 key (type: int) 6 key (type: int) + Statistics: Num rows: 66 Data size: 462 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -444,11 +546,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -556,12 +661,15 @@ STAGE PLANS: 5 key (type: int) 6 key (type: int) 7 key (type: int) + Statistics: Num rows: 77 Data size: 539 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Reducer 2 Reduce Operator Tree: @@ -569,11 +677,14 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -671,14 +782,98 @@ LEFT OUTER JOIN test_table7 s ON a.key = s.key LEFT OUTER JOIN test_table8 t ON a.key = t.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: q + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + 2 + 3 + 4 + keys: + 0 _col0 (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + Local Work: + Map Reduce Local Work + Map 3 + Map Operator Tree: + TableScan + alias: r + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + 2 + 3 + 4 + keys: + 0 _col0 (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + Local Work: + Map Reduce Local Work + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + 2 + 3 + 4 + keys: + 0 _col0 (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + Local Work: + Map Reduce Local Work + Map 5 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} {_col1} + 1 + 2 + 3 + 4 + keys: + 0 _col0 (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + Local Work: + Map Reduce Local Work + Stage: Stage-1 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 3), Map 3 (PARTITION-LEVEL SORT, 3), Map 4 (PARTITION-LEVEL SORT, 3), Map 5 (PARTITION-LEVEL SORT, 3), Map 6 (PARTITION-LEVEL SORT, 3) #### A masked pattern was here #### Vertices: Map 1 @@ -738,75 +933,45 @@ STAGE PLANS: 14 key (type: int) 15 key (type: int) outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Map 3 - Map Operator Tree: - TableScan - alias: q - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: r - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: t - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - Left Outer Join0 to 2 - Left Outer Join0 to 3 - Left Outer Join0 to 4 - condition expressions: - 0 {KEY.reducesinkkey0} {VALUE._col0} - 1 - 2 - 3 - 4 - outputColumnNames: _col0, _col1 - Select Operator - expressions: _col0 (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 165 Data size: 1155 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + condition expressions: + 0 {_col0} {_col1} + 1 + 2 + 3 + 4 + keys: + 0 _col0 (type: int) + 1 key (type: int) + 2 key (type: int) + 3 key (type: int) + 4 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + 2 Map 3 + 3 Map 4 + 4 Map 5 + Statistics: Num rows: 726 Data size: 5082 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 726 Data size: 5082 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 726 Data size: 5082 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index df66cc2..46a9f5c 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -201,24 +201,23 @@ explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Spark - Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -228,21 +227,33 @@ STAGE PLANS: keys: 0 5 (type: int) 1 5 (type: int) + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int) outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Map 3 + Statistics: Num rows: 29 Data size: 118 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + condition expressions: + 0 {_col0} + 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 51 Data size: 206 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 52 Data size: 208 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 5) (type: boolean) - Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -252,33 +263,40 @@ STAGE PLANS: keys: 0 5 (type: int) 1 5 (type: int) + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 5 (type: int) outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reducer 2 - Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {KEY.reducesinkkey0} - 1 {KEY.reducesinkkey0} - outputColumnNames: _col0, _col1 - Filter Operator - predicate: (_col1 = 5) (type: boolean) - Select Operator - expressions: _col0 (type: int), 5 (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 28 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {_col0} + 1 {_col0} + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 2 + Statistics: Num rows: 31 Data size: 129 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = 5) (type: boolean) + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 62 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work Stage: Stage-0 Fetch Operator