diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index f583aaf..6938921 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -498,6 +498,7 @@ spark.query.files=add_part_multiple.q, \ auto_join22.q, \ auto_join23.q, \ auto_join24.q, \ + auto_join25.q, \ auto_join26.q, \ auto_join27.q, \ auto_join28.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java index 69004dc..e336c40 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkMapJoinResolver.java @@ -285,6 +285,23 @@ private SparkTask createSparkTask(SparkTask originalTask, return resultTask; } + private void updateBackupTask(Task task) { + if (task.getBackupTask() != null) { + updateBackupTask(task, task.getBackupTask(), task.getBackupChildrenTasks()); + } + } + + private void updateBackupTask(Task task, Task backupTask, + List> backupChildrenTasks) { + task.setBackupTask(backupTask); + task.setBackupChildrenTasks(backupChildrenTasks); + if (task.getParentTasks() != null) { + for (Task parentTask : task.getParentTasks()) { + updateBackupTask(parentTask, backupTask, backupChildrenTasks); + } + } + } + @Override public Object dispatch(Node nd, Stack stack, Object... nos) throws SemanticException { @@ -339,6 +356,8 @@ private void processCurrentTask(SparkTask sparkTask, ConditionalTask conditional for (SparkWork work : dependencyGraph.keySet()) { createSparkTask(sparkTask, work, createdTaskMap, conditionalTask); } + + updateBackupTask(sparkTask); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java index 79c3e02..4c4c416 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/StageIDsRearranger.java @@ -167,7 +167,14 @@ protected void rejected(Task child) { if (task instanceof ConditionalTask) { return ((ConditionalTask) task).getListTasks(); } - return task.getChildTasks(); + List> result = new ArrayList>(); + if (task.getChildTasks() != null) { + result.addAll(task.getChildTasks()); + } + if (task.getBackupTask() != null) { + result.add(task.getBackupTask()); + } + return result; } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkJoinOptimizer.java index d57ceff..f64bf34 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkJoinOptimizer.java @@ -43,7 +43,8 @@ public SparkJoinOptimizer(ParseContext procCtx) { } @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { OptimizeSparkProcContext context = (OptimizeSparkProcContext) procCtx; HiveConf conf = context.getConf(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java index 9ff47c7..31f0d36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkMapJoinOptimizer.java @@ -33,7 +33,9 @@ import org.apache.hadoop.hive.ql.exec.MuxOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -69,6 +71,7 @@ throws SemanticException { OptimizeSparkProcContext context = (OptimizeSparkProcContext) procCtx; + ParseContext parseCtx = context.getParseContext(); HiveConf conf = context.getConf(); JoinOperator joinOp = (JoinOperator) nd; @@ -84,6 +87,19 @@ return null; } + // For creating backup task for MJ. We need to save replaced operators (MJ, RS, etc). + // They are needed for creating back up task at a later stage. + if (parseCtx.getClonedRootsForBackupTask() == null) { + List> roots = new ArrayList>(parseCtx.getTopOps().values()); + List> clonedRoots = Utilities.cloneOperatorTree(parseCtx.getConf(), roots); + parseCtx.setClonedRootsForBackupTask(clonedRoots); + for (int i = 0; i < roots.size(); i++) { + TableScanOperator ts = (TableScanOperator) roots.get(i); + TableScanOperator clonedTs = (TableScanOperator) clonedRoots.get(i); + clonedTs.getConf().setTableMetadata(ts.getConf().getTableMetadata()); + } + } + int numBuckets = -1; List> bucketColNames = null; @@ -91,8 +107,7 @@ MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, mapJoinConversionPos); if (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) { LOG.info("Check if it can be converted to bucketed map join"); - numBuckets = convertJoinBucketMapJoin(joinOp, mapJoinOp, - context, mapJoinConversionPos); + numBuckets = convertJoinBucketMapJoin(joinOp, mapJoinOp, context, mapJoinConversionPos); if (numBuckets > 1) { LOG.info("Converted to map join with " + numBuckets + " buckets"); bucketColNames = joinOp.getOpTraits().getBucketColNames(); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java index 6e0ac38..562ac7c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java @@ -135,8 +135,8 @@ private static void initSMBJoinPlan(MapWork mapWork, } private static String findAliasId(GenSparkProcContext opProcCtx, TableScanOperator ts) { - for (String alias : opProcCtx.topOps.keySet()) { - if (opProcCtx.topOps.get(alias) == ts) { + for (String alias : opProcCtx.parseContext.getTopOps().keySet()) { + if (opProcCtx.parseContext.getTopOps().get(alias) == ts) { return alias; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index b838bff..617c598 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -107,6 +107,11 @@ private Operator fetchSource; private ListSinkOperator fetchSink; + // For Spark, we create backup task for MJ task. To do that, we need + // to save cloned root operators before converting JOIN to MAPJOIN. + // They are used later for creating backup task. + private List> clonedRootsForBackupTask; + public ParseContext() { } @@ -600,4 +605,13 @@ public ListSinkOperator getFetchSink() { public void setFetchSink(ListSinkOperator fetchSink) { this.fetchSink = fetchSink; } + + public List> getClonedRootsForBackupTask() { + return clonedRootsForBackupTask; + } + + public void setClonedRootsForBackupTask(List> clonedRootsForBackupTask) { + this.clonedRootsForBackupTask = clonedRootsForBackupTask; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java index 773cfbd..302bc8c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java @@ -134,25 +134,19 @@ public final Set fileSinkSet; public final Map> fileSinkMap; - // Alias to operator map, from the semantic analyzer. - // This is necessary as sometimes semantic analyzer's mapping is different than operator's own alias. - public final Map> topOps; - @SuppressWarnings("unchecked") public GenSparkProcContext(HiveConf conf, ParseContext parseContext, List> moveTask, List> rootTasks, Set inputs, - Set outputs, - Map> topOps) { + Set outputs) { this.conf = conf; this.parseContext = parseContext; this.moveTask = moveTask; this.rootTasks = rootTasks; this.inputs = inputs; this.outputs = outputs; - this.topOps = topOps; this.currentTask = (SparkTask) TaskFactory.get( new SparkWork(conf.getVar(HiveConf.ConfVars.HIVEQUERYID)), conf); this.rootTasks.add(currentTask); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java index f7586a4..3b71af1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/OptimizeSparkProcContext.java @@ -20,15 +20,12 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.parse.ParseContext; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; -import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -49,19 +46,12 @@ private final Set visitedReduceSinks = new HashSet(); private final Map mjOpSizes = new HashMap(); - // rootOperators are all the table scan operators in sequence - // of traversal - private final Deque> rootOperators; - public OptimizeSparkProcContext(HiveConf conf, ParseContext parseContext, - Set inputs, Set outputs, - Deque> rootOperators) { - + Set inputs, Set outputs) { this.conf = conf; this.parseContext = parseContext; this.inputs = inputs; this.outputs = outputs; - this.rootOperators = rootOperators; } public ParseContext getParseContext() { @@ -84,10 +74,6 @@ public HiveConf getConf() { return visitedReduceSinks; } - public Deque> getRootOperators() { - return rootOperators; - } - public Map getMjOpSizes() { return mjOpSizes; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java index 3a7477a..224a317 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java @@ -19,10 +19,8 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Deque; import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -58,7 +56,6 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.lib.TypeRule; import org.apache.hadoop.hive.ql.log.PerfLogger; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; @@ -82,7 +79,6 @@ import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.SparkWork; -import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; /** * SparkCompiler translates the operator plan into SparkTasks. @@ -101,31 +97,31 @@ public SparkCompiler() { protected void optimizeOperatorPlan(ParseContext pCtx, Set inputs, Set outputs) throws SemanticException { PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_OPERATOR_TREE); - // Sequence of TableScan operators to be walked - Deque> deque = new LinkedList>(); - deque.addAll(pCtx.getTopOps().values()); - OptimizeSparkProcContext procCtx = new OptimizeSparkProcContext(conf, pCtx, inputs, outputs, deque); + OptimizeSparkProcContext procCtx = new OptimizeSparkProcContext(conf, pCtx, inputs, outputs); // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("Set parallelism - ReduceSink", - ReduceSinkOperator.getOperatorName() + "%"), - new SetSparkReducerParallelism()); - - opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx)); - - opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx)); + ReduceSinkOperator.getOperatorName() + "%"), new SetSparkReducerParallelism()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); GraphWalker ogw = new DefaultGraphWalker(disp); + ArrayList topNodes = new ArrayList(pCtx.getTopOps().values()); + ogw.startWalking(topNodes, null); - // Create a list of topop nodes - ArrayList topNodes = new ArrayList(); - topNodes.addAll(pCtx.getTopOps().values()); + // We need to use two-passes, otherwise cloned RS will not get # of reducers + // set by SetSparkReduceParallelism. + opRules.clear(); + opRules.put(new TypeRule(JoinOperator.class), new SparkJoinOptimizer(pCtx)); + opRules.put(new TypeRule(MapJoinOperator.class), new SparkJoinHintOptimizer(pCtx)); + + disp = new DefaultRuleDispatcher(null, opRules, procCtx); + ogw = new DefaultGraphWalker(disp); ogw.startWalking(topNodes, null); + PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_OPERATOR_TREE); } @@ -137,13 +133,35 @@ protected void generateTaskTree(List> rootTasks, Pa List> mvTask, Set inputs, Set outputs) throws SemanticException { PERF_LOGGER.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE); + GenSparkUtils.getUtils().resetSequenceNumber(); - ParseContext tempParseContext = getParseContext(pCtx, rootTasks); - GenSparkWork genSparkWork = new GenSparkWork(GenSparkUtils.getUtils()); + SparkTask mainTask = generateWorkTree(rootTasks, pCtx, mvTask, inputs, outputs, + new ArrayList(pCtx.getTopOps().values()), true); + + if (pCtx.getClonedRootsForBackupTask() != null) { + // Generate work tree for the backup task + SparkTask backupTask = generateWorkTree(rootTasks, pCtx, mvTask, inputs, outputs, + new ArrayList(pCtx.getClonedRootsForBackupTask()), false); + rootTasks.remove(backupTask); + mainTask.setBackupTask(backupTask); + mainTask.setBackupChildrenTasks(mainTask.getChildTasks()); + backupTask.setChildTasks(mainTask.getChildTasks()); + } + + PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE); + } + private SparkTask generateWorkTree(List> rootTasks, ParseContext pCtx, + List> mvTask, Set inputs, + Set outputs, List topNodes, + boolean addFileSink) throws SemanticException { + ParseContext tempParseContext = getParseContext(pCtx, rootTasks); GenSparkProcContext procCtx = new GenSparkProcContext( - conf, tempParseContext, mvTask, rootTasks, inputs, outputs, pCtx.getTopOps()); + conf, tempParseContext, mvTask, rootTasks, inputs, outputs); + + GenSparkUtils utils = GenSparkUtils.getUtils(); + GenSparkWork genSparkWork = new GenSparkWork(utils); // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. The dispatcher generates the plan from the operator tree @@ -153,12 +171,14 @@ protected void generateTaskTree(List> rootTasks, Pa opRules.put(new TypeRule(MapJoinOperator.class), new SparkReduceSinkMapJoinProc()); + NodeProcessor fileSinkProc = addFileSink ? + new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork) : genSparkWork; + opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", - FileSinkOperator.getOperatorName() + "%"), - new CompositeProcessor(new SparkFileSinkProcessor(), genSparkWork)); + FileSinkOperator.getOperatorName() + "%"), fileSinkProc); opRules.put(new RuleRegExp("Handle Analyze Command", - TableScanOperator.getOperatorName() + "%"), + TableScanOperator.getOperatorName() + "%"), new SparkProcessAnalyzeTable(GenSparkUtils.getUtils())); opRules.put(new RuleRegExp("Remember union", UnionOperator.getOperatorName() + "%"), @@ -185,28 +205,27 @@ public Object process(Node n, Stack s, * SMBJoinOP * * Some of the other processors are expecting only one traversal beyond SMBJoinOp. - * We need to traverse from the big-table path only, and stop traversing on the small-table path once we reach SMBJoinOp. + * We need to traverse from the big-table path only, and stop traversing on the + * small-table path once we reach SMBJoinOp. */ opRules.put(new TypeRule(SMBMapJoinOperator.class), - new NodeProcessor() { - @Override - public Object process(Node currNode, Stack stack, - NodeProcessorCtx procCtx, Object... os) throws SemanticException { - for (Node stackNode : stack) { - if (stackNode instanceof DummyStoreOperator) { - return true; + new NodeProcessor() { + @Override + public Object process(Node currNode, Stack stack, + NodeProcessorCtx procCtx, Object... os) throws SemanticException { + for (Node stackNode : stack) { + if (stackNode instanceof DummyStoreOperator) { + return true; + } } + return false; } - return false; } - } ); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(null, opRules, procCtx); - List topNodes = new ArrayList(); - topNodes.addAll(pCtx.getTopOps().values()); GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx); ogw.startWalking(topNodes, null); @@ -215,25 +234,23 @@ public Object process(Node currNode, Stack stack, // SMB Join optimizations to add the "localWork" and bucketing data structures to MapWork. opRules.clear(); opRules.put(new TypeRule(SMBMapJoinOperator.class), - SparkSortMergeJoinFactory.getTableScanMapJoin()); + SparkSortMergeJoinFactory.getTableScanMapJoin()); disp = new DefaultRuleDispatcher(null, opRules, procCtx); - topNodes = new ArrayList(); - topNodes.addAll(pCtx.getTopOps().values()); ogw = new GenSparkWorkWalker(disp, procCtx); ogw.startWalking(topNodes, null); // we need to clone some operator plans and remove union operators still for (BaseWork w: procCtx.workWithUnionOperators) { - GenSparkUtils.getUtils().removeUnionOperators(conf, procCtx, w); + utils.removeUnionOperators(conf, procCtx, w); } // finally make sure the file sink operators are set up right for (FileSinkOperator fileSink: procCtx.fileSinkSet) { - GenSparkUtils.getUtils().processFileSink(procCtx, fileSink); + utils.processFileSink(procCtx, fileSink); } - PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_GENERATE_TASK_TREE); + return procCtx.currentTask; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 0e85990..49ae06c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -94,7 +94,7 @@ private boolean isMetadataOnly = false; - private transient final Table tableMetadata; + private transient Table tableMetadata; public TableScanDesc() { @@ -268,4 +268,8 @@ public boolean getIsMetadataOnly() { public Table getTableMetadata() { return tableMetadata; } + + public void setTableMetadata(Table tableMetadata) { + this.tableMetadata = tableMetadata; + } } diff --git ql/src/test/results/clientpositive/auto_join30.q.out ql/src/test/results/clientpositive/auto_join30.q.out index cb3f777..4f635cf 100644 --- ql/src/test/results/clientpositive/auto_join30.q.out +++ ql/src/test/results/clientpositive/auto_join30.q.out @@ -20,9 +20,9 @@ STAGE DEPENDENCIES: Stage-8 has a backup stage: Stage-2 Stage-5 depends on stages: Stage-8 Stage-3 depends on stages: Stage-2, Stage-5, Stage-6 + Stage-2 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 - Stage-2 Stage-4 is a root stage Stage-0 depends on stages: Stage-3 @@ -123,6 +123,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: @@ -161,43 +198,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 Map Reduce Map Operator Tree: @@ -699,11 +699,11 @@ STAGE DEPENDENCIES: Stage-10 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-10 Stage-3 depends on stages: Stage-2, Stage-6, Stage-7, Stage-8 + Stage-2 Stage-11 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-11 Stage-12 has a backup stage: Stage-2 Stage-8 depends on stages: Stage-12 - Stage-2 Stage-4 is a root stage Stage-5 is a root stage Stage-0 depends on stages: Stage-3 @@ -818,6 +818,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: @@ -920,51 +965,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 Map Reduce Map Operator Tree: @@ -1084,9 +1084,9 @@ STAGE DEPENDENCIES: Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-2 Stage-10 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-10 - Stage-2 Stage-4 is a root stage Stage-5 is a root stage Stage-0 depends on stages: Stage-3 @@ -1198,6 +1198,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: @@ -1249,51 +1294,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Left Outer Join0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 Map Reduce Map Operator Tree: diff --git ql/src/test/results/clientpositive/auto_join31.q.out ql/src/test/results/clientpositive/auto_join31.q.out index 46b47dd..6c267cc 100644 --- ql/src/test/results/clientpositive/auto_join31.q.out +++ ql/src/test/results/clientpositive/auto_join31.q.out @@ -26,9 +26,9 @@ STAGE DEPENDENCIES: Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-2 Stage-10 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-10 - Stage-2 Stage-4 is a root stage Stage-5 is a root stage Stage-0 depends on stages: Stage-3 @@ -140,6 +140,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: @@ -191,51 +236,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - 2 _col0 (type: string) - outputColumnNames: _col2, _col3 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(hash(_col2,_col3)) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 Map Reduce Map Operator Tree: diff --git ql/src/test/results/clientpositive/auto_join_stats.q.out ql/src/test/results/clientpositive/auto_join_stats.q.out index 9100762..a5a24c2 100644 --- ql/src/test/results/clientpositive/auto_join_stats.q.out +++ ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -38,9 +38,9 @@ STAGE DEPENDENCIES: Stage-6 depends on stages: Stage-10 Stage-9 depends on stages: Stage-1, Stage-6, Stage-7 Stage-5 depends on stages: Stage-9 + Stage-1 Stage-11 has a backup stage: Stage-1 Stage-7 depends on stages: Stage-11 - Stage-1 Stage-0 depends on stages: Stage-5 STAGE PLANS: @@ -134,48 +134,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-11 - Map Reduce Local Work - Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - alias: src2 - Filter Operator - predicate: key is not null (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - Filter Operator - predicate: (_col0 + _col5) is not null (type: boolean) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -220,6 +178,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-11 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: -1 @@ -272,9 +272,9 @@ STAGE DEPENDENCIES: Stage-9 depends on stages: Stage-13 Stage-12 depends on stages: Stage-1, Stage-9, Stage-10 Stage-7 depends on stages: Stage-12 + Stage-1 Stage-14 has a backup stage: Stage-1 Stage-10 depends on stages: Stage-14 - Stage-1 Stage-0 depends on stages: Stage-7 STAGE PLANS: @@ -393,48 +393,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-14 - Map Reduce Local Work - Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - - Stage: Stage-10 - Map Reduce - Map Operator Tree: - TableScan - alias: src2 - Filter Operator - predicate: key is not null (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col5 - Filter Operator - predicate: (_col0 + _col5) is not null (type: boolean) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -479,6 +437,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/auto_join_without_localtask.q.out ql/src/test/results/clientpositive/auto_join_without_localtask.q.out index fc44acf..bf18360 100644 --- ql/src/test/results/clientpositive/auto_join_without_localtask.q.out +++ ql/src/test/results/clientpositive/auto_join_without_localtask.q.out @@ -9,9 +9,9 @@ STAGE DEPENDENCIES: Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -88,52 +88,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -184,6 +138,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: 40 @@ -256,11 +256,11 @@ STAGE DEPENDENCIES: Stage-12 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-12 Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 - Stage-7 has a backup stage: Stage-2 Stage-2 + Stage-7 has a backup stage: Stage-2 + Stage-1 Stage-15 has a backup stage: Stage-1 Stage-10 depends on stages: Stage-15 - Stage-1 Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -380,39 +380,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME - TableScan - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -454,41 +421,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-15 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:a - TableScan - alias: a - Filter Operator - predicate: (value is not null and key is not null) (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-10 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan alias: a Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 File Output Operator @@ -499,6 +446,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan Stage: Stage-1 Map Reduce @@ -550,6 +504,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:a + TableScan + alias: a + Filter Operator + predicate: (value is not null and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: 40 @@ -624,12 +624,12 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-3, Stage-8, Stage-9 , consists of Stage-5, Stage-12, Stage-1 Stage-5 has a backup stage: Stage-1 Stage-2 depends on stages: Stage-1, Stage-5, Stage-6 + Stage-1 Stage-12 has a backup stage: Stage-1 Stage-6 depends on stages: Stage-12 - Stage-1 + Stage-3 Stage-14 has a backup stage: Stage-3 Stage-9 depends on stages: Stage-14 - Stage-3 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -745,49 +745,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-12 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:a - TableScan - alias: a - Filter Operator - predicate: value is not null (type: boolean) - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col2 - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -833,49 +790,46 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:$hdt$_1:a + $hdt$_0:$hdt$_0:a Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:$hdt$_1:a + $hdt$_0:$hdt$_0:a TableScan alias: a Filter Operator - predicate: ((UDFToDouble(key) > 100.0) and value is not null) (type: boolean) + predicate: value is not null (type: boolean) Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) - Stage: Stage-9 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan - alias: a - Filter Operator - predicate: (UDFToDouble(key) > 100.0) (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -929,6 +883,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-14 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:$hdt$_1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:$hdt$_1:a + TableScan + alias: a + Filter Operator + predicate: ((UDFToDouble(key) > 100.0) and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (UDFToDouble(key) > 100.0) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: 40 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out index 926c99a..72060ce 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_1.q.out @@ -591,10 +591,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -873,6 +873,165 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: @@ -1193,165 +1352,6 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out index 597e423..cdacd6e 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out @@ -350,10 +350,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -632,6 +632,165 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: @@ -952,165 +1111,6 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out index 4b392c7..fa16fa3 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_3.q.out @@ -473,10 +473,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -799,6 +799,116 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: @@ -1072,116 +1182,6 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out index 9e33128..c97cf49 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_4.q.out @@ -489,10 +489,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -815,6 +815,116 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: @@ -1088,116 +1198,6 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out index 694e72b..52fc70b 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_5.q.out @@ -434,10 +434,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -607,42 +607,24 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 1 - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) - Position of Big Table: 1 + Position of Big Table: 0 Group By Operator aggregations: count() mode: hash @@ -652,8 +634,6 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Local Work: - Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -701,46 +681,8 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big -#### A masked pattern was here #### - Partition - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_small - numFiles 4 - serialization.ddl struct bucket_small { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 226 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_small - name: default.bucket_small Truncated Path -> Alias: - /bucket_small [b] + /bucket_big [a] Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -771,24 +713,42 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + alias: b GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 14 Data size: 1425 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) - Position of Big Table: 0 + Position of Big Table: 1 Group By Operator aggregations: count() mode: hash @@ -798,6 +758,8 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false + Local Work: + Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -845,8 +807,46 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket_big name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small Truncated Path -> Alias: - /bucket_big [a] + /bucket_small [b] Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out index c16aae0..730bc92 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_6.q.out @@ -407,11 +407,11 @@ STAGE DEPENDENCIES: Stage-9 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-9 Stage-2 depends on stages: Stage-1, Stage-5, Stage-6, Stage-7 + Stage-1 Stage-10 has a backup stage: Stage-1 Stage-6 depends on stages: Stage-10 Stage-11 has a backup stage: Stage-1 Stage-7 depends on stages: Stage-11 - Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -503,6 +503,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: @@ -621,64 +679,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(key) (type: double) - 2 UDFToDouble(key) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -982,11 +982,11 @@ STAGE DEPENDENCIES: Stage-9 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-9 Stage-2 depends on stages: Stage-1, Stage-5, Stage-6, Stage-7 + Stage-1 Stage-10 has a backup stage: Stage-1 Stage-6 depends on stages: Stage-10 Stage-11 has a backup stage: Stage-1 Stage-7 depends on stages: Stage-11 - Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -1078,6 +1078,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: c + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 UDFToDouble(key) (type: double) + 1 UDFToDouble(key) (type: double) + 2 UDFToDouble(key) (type: double) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: @@ -1196,64 +1254,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(key) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(key) (type: double) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 UDFToDouble(key) (type: double) - 1 UDFToDouble(key) (type: double) - 2 UDFToDouble(key) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -1283,12 +1283,12 @@ STAGE DEPENDENCIES: Stage-7 is a root stage , consists of Stage-8, Stage-9, Stage-10, Stage-1 Stage-8 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-8 + Stage-1 Stage-9 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-9 Stage-10 has a backup stage: Stage-1 Stage-6 depends on stages: Stage-10 - Stage-1 - Stage-0 depends on stages: Stage-4, Stage-5, Stage-6, Stage-1 + Stage-0 depends on stages: Stage-4, Stage-1, Stage-5, Stage-6 STAGE PLANS: Stage: Stage-7 @@ -1364,6 +1364,45 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: @@ -1504,45 +1543,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out index 91ad73e..1de8332 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_7.q.out @@ -608,10 +608,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -983,6 +983,165 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: @@ -1351,165 +1510,6 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 54 Data size: 5500 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 27 Data size: 2750 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 2 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2750 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out index 85b1458..26122e1 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_8.q.out @@ -610,10 +610,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -985,6 +985,165 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + base file name: ds=2008-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: @@ -1353,165 +1512,6 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) - Position of Big Table: 0 - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-08 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big -#### A masked pattern was here #### - Partition - base file name: ds=2008-04-09 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-09 - properties: - COLUMN_STATS_ACCURATE true - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - numFiles 4 - numRows 0 - partition_columns ds - partition_columns.types string - rawDataSize 0 - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - SORTBUCKETCOLSPREFIX TRUE - bucket_count 4 - bucket_field_name key - columns key,value - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.bucket_big - partition_columns ds - partition_columns.types string - serialization.ddl struct bucket_big { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_big - name: default.bucket_big - Truncated Path -> Alias: - /bucket_big/ds=2008-04-08 [a] - /bucket_big/ds=2008-04-09 [a] - Needs Tagging: false - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0 - columns.types bigint - escape.delim \ - hive.serialization.extend.nesting.levels true - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out index 9837c78..a5aa8b0 100644 --- ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/auto_sortmerge_join_9.q.out @@ -378,11 +378,11 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-9, Stage-10, Stage-2 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 + Stage-2 Stage-10 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-10 - Stage-2 Stage-4 is a root stage - Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 + Stage-0 depends on stages: Stage-6, Stage-2, Stage-7 STAGE PLANS: Stage: Stage-1 @@ -464,6 +464,39 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: @@ -501,39 +534,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-4 Map Reduce Map Operator Tree: @@ -1562,10 +1562,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -1625,31 +1625,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq1:a - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -1662,8 +1647,6 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1679,16 +1662,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -1701,6 +1699,8 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1757,10 +1757,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -1825,31 +1825,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq1:a - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -1866,8 +1851,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1884,16 +1867,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -1910,6 +1908,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1983,9 +1983,9 @@ STAGE DEPENDENCIES: Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -2075,31 +2075,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - subq2:subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq2:subq1:a - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -2116,8 +2101,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2136,16 +2119,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subq2:subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq2:subq1:a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -2162,6 +2160,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2256,19 +2256,19 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-1, Stage-4, Stage-9, Stage-10, Stage-12, Stage-13 , consists of Stage-15, Stage-16, Stage-2 Stage-15 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-15 + Stage-2 Stage-16 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-16 - Stage-2 + Stage-1 Stage-18 has a backup stage: Stage-1 Stage-10 depends on stages: Stage-18 - Stage-1 Stage-14 is a root stage , consists of Stage-19, Stage-20, Stage-4 Stage-19 has a backup stage: Stage-4 Stage-12 depends on stages: Stage-19 + Stage-4 Stage-20 has a backup stage: Stage-4 Stage-13 depends on stages: Stage-20 - Stage-4 - Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 + Stage-0 depends on stages: Stage-6, Stage-2, Stage-7 STAGE PLANS: Stage: Stage-11 @@ -2370,46 +2370,9 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-16 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-7 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - - Stage: Stage-2 - Map Reduce - Map Operator Tree: + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: int) @@ -2440,31 +2403,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-18 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: - src1:subq1:a + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - src1:subq1:a + $INTNAME TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) - Stage: Stage-10 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan - alias: b + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -2481,8 +2466,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2496,16 +2479,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-18 + Map Reduce Local Work + Alias -> Map Local Tables: + src1:subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1:subq1:a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -2522,6 +2520,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2594,31 +2594,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-20 - Map Reduce Local Work - Alias -> Map Local Tables: - src2:subq2:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src2:subq2:a - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-13 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -2635,8 +2620,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2650,16 +2633,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-20 + Map Reduce Local Work + Alias -> Map Local Tables: + src2:subq2:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src2:subq2:a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -2676,6 +2674,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2761,10 +2761,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -2830,37 +2830,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq1:a - TableScan - alias: a - Filter Operator - predicate: (key < 6) (type: boolean) - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Map Join Operator + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -2873,8 +2856,6 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2890,20 +2871,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -2916,6 +2914,8 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -2988,10 +2988,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -3024,7 +3024,50 @@ STAGE PLANS: Select Operator expressions: key (type: int) outputColumnNames: _col0 - Map Join Operator + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -3037,8 +3080,6 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3111,47 +3152,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -3233,10 +3233,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -3302,37 +3302,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - subq2:subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq2:subq1:a - TableScan - alias: a - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Map Join Operator + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -3345,8 +3328,6 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3362,20 +3343,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + subq2:subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq2:subq1:a + TableScan + alias: a + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -3388,6 +3386,8 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3474,10 +3474,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -3543,37 +3543,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq1:a - TableScan - alias: a - Filter Operator - predicate: (key < 8) (type: boolean) - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Map Join Operator + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -3586,8 +3569,6 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3603,20 +3584,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a + TableScan + alias: a + Filter Operator + predicate: (key < 8) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -3629,6 +3627,8 @@ STAGE PLANS: Reduce Output Operator sort order: value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3687,10 +3687,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -3753,48 +3753,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq1:a + Stage: Stage-1 + Map Reduce + Map Operator Tree: TableScan alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - HashTable Sink Operator + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 key (type: int) - - Stage: Stage-4 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Local Work: - Map Reduce Local Work + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3810,32 +3794,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 - Map Reduce - Map Operator Tree: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 + HashTable Sink Operator keys: 0 _col0 (type: int) 1 key (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -3892,10 +3892,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -3958,6 +3958,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: @@ -4015,43 +4052,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -4103,12 +4103,12 @@ STAGE DEPENDENCIES: Stage-7 is a root stage , consists of Stage-8, Stage-9, Stage-10, Stage-1 Stage-8 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-8 + Stage-1 Stage-9 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-9 Stage-10 has a backup stage: Stage-1 Stage-6 depends on stages: Stage-10 - Stage-1 - Stage-0 depends on stages: Stage-4, Stage-5, Stage-6, Stage-1 + Stage-0 depends on stages: Stage-4, Stage-1, Stage-5, Stage-6 STAGE PLANS: Stage: Stage-7 @@ -4193,6 +4193,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: @@ -4351,49 +4394,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 @@ -4459,10 +4459,10 @@ STAGE DEPENDENCIES: Stage-5 is a root stage , consists of Stage-6, Stage-7, Stage-1 Stage-6 has a backup stage: Stage-1 Stage-3 depends on stages: Stage-6 + Stage-1 Stage-7 has a backup stage: Stage-1 Stage-4 depends on stages: Stage-7 - Stage-1 - Stage-0 depends on stages: Stage-3, Stage-4, Stage-1 + Stage-0 depends on stages: Stage-3, Stage-1, Stage-4 STAGE PLANS: Stage: Stage-5 @@ -4525,6 +4525,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: @@ -4582,47 +4623,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out index 599b8b9..d1c0330 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out @@ -228,9 +228,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 Stage-2 depends on stages: Stage-0 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 STAGE PLANS: Stage: Stage-6 @@ -302,31 +302,16 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -341,8 +326,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col0 (type: int), _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -353,16 +336,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -377,6 +375,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col0 (type: int), _col1 (type: string) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -937,9 +937,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 Stage-2 depends on stages: Stage-0 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 STAGE PLANS: Stage: Stage-6 @@ -1017,37 +1017,20 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a:test_table1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a:test_table1 - TableScan - alias: test_table1 - Filter Operator - predicate: key is not null (type: boolean) - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: test_table2 + alias: test_table1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -1062,8 +1045,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col0 (type: int), _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -1074,20 +1055,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + a:test_table1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a:test_table1 + TableScan + alias: test_table1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: test_table2 Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -1102,6 +1100,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) value expressions: _col0 (type: int), _col1 (type: string) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out index fd99597..ce59b45 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -223,9 +223,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 Stage-2 depends on stages: Stage-0 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 STAGE PLANS: Stage: Stage-6 @@ -294,31 +294,16 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -330,8 +315,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: int), _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -342,16 +325,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -363,6 +361,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: int), _col1 (type: string) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out index 8130ab9..59a7d45 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -76,9 +76,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 Stage-2 depends on stages: Stage-0 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 STAGE PLANS: Stage: Stage-6 @@ -150,31 +150,16 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Filter Operator - predicate: key is not null (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -189,8 +174,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col0 (type: int), _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -201,16 +184,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int) + 1 key (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -225,6 +223,8 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col0 (type: int), _col1 (type: string) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -325,9 +325,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 Stage-2 depends on stages: Stage-0 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 STAGE PLANS: Stage: Stage-6 @@ -405,37 +405,20 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a:test_table1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a:test_table1 - TableScan - alias: test_table1 - Filter Operator - predicate: key is not null (type: boolean) - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: test_table2 + alias: test_table1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Map Join Operator + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -450,8 +433,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col0 (type: int), _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -462,20 +443,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + a:test_table1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a:test_table1 + TableScan + alias: test_table1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + alias: test_table2 Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -490,6 +488,8 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col0 (type: int), _col1 (type: string) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out index 627aba0..e20ac0d 100644 --- ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out +++ ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out @@ -348,9 +348,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 Stage-2 depends on stages: Stage-0 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 STAGE PLANS: Stage: Stage-6 @@ -422,31 +422,16 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Filter Operator - predicate: (key is not null and key2 is not null) (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int), key2 (type: int) - 1 key (type: int), key2 (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Map Join Operator + Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -461,8 +446,6 @@ STAGE PLANS: sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -473,16 +456,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -497,6 +495,8 @@ STAGE PLANS: sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -535,9 +535,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 Stage-2 depends on stages: Stage-0 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 STAGE PLANS: Stage: Stage-6 @@ -609,31 +609,16 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq1:a - TableScan - alias: a - Filter Operator - predicate: (key is not null and key2 is not null) (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int), key2 (type: int) - 1 key (type: int), key2 (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Map Join Operator + Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -648,8 +633,6 @@ STAGE PLANS: sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -660,16 +643,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq1:a + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -684,6 +682,8 @@ STAGE PLANS: sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -1044,9 +1044,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-7 Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 Stage-2 depends on stages: Stage-0 + Stage-1 Stage-8 has a backup stage: Stage-1 Stage-5 depends on stages: Stage-8 - Stage-1 STAGE PLANS: Stage: Stage-6 @@ -1118,31 +1118,16 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - subq2:subq1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq2:subq1:a - TableScan - alias: a - Filter Operator - predicate: (key is not null and key2 is not null) (type: boolean) - HashTable Sink Operator - keys: - 0 key (type: int), key2 (type: int) - 1 key (type: int), key2 (type: int) - - Stage: Stage-5 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: b + alias: a + Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Map Join Operator + Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -1157,8 +1142,6 @@ STAGE PLANS: sort order: -- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator @@ -1169,16 +1152,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 - Stage: Stage-1 + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subq2:subq1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq2:subq1:a + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + HashTable Sink Operator + keys: + 0 key (type: int), key2 (type: int) + 1 key (type: int), key2 (type: int) + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 91 Basic stats: COMPLETE Column stats: NONE + alias: b Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 27 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -1193,6 +1191,8 @@ STAGE PLANS: sort order: -- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Local Work: + Map Reduce Local Work Reduce Operator Tree: Extract File Output Operator diff --git ql/src/test/results/clientpositive/correlationoptimizer5.q.out ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 6ba3462..fc4095e 100644 --- ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -491,9 +491,9 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-12 Stage-0 depends on stages: Stage-2, Stage-7, Stage-8 Stage-3 depends on stages: Stage-0 + Stage-2 Stage-13 has a backup stage: Stage-2 Stage-8 depends on stages: Stage-13 - Stage-2 Stage-15 is a root stage Stage-11 depends on stages: Stage-15 @@ -597,44 +597,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-13 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col3 - Select Operator - expressions: _col0 (type: int), _col3 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co3 - Local Work: - Map Reduce Local Work - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -673,6 +635,44 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + $INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col3 + Select Operator + expressions: _col0 (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co3 + Local Work: + Map Reduce Local Work + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: diff --git ql/src/test/results/clientpositive/cross_product_check_2.q.out ql/src/test/results/clientpositive/cross_product_check_2.q.out index 102eeb9..73ffc36 100644 --- ql/src/test/results/clientpositive/cross_product_check_2.q.out +++ ql/src/test/results/clientpositive/cross_product_check_2.q.out @@ -440,12 +440,12 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-1, Stage-4 , consists of Stage-10, Stage-11, Stage-2 Stage-10 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-10 + Stage-2 Stage-11 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-11 - Stage-2 Stage-12 is a root stage Stage-4 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-6, Stage-7, Stage-2 + Stage-0 depends on stages: Stage-6, Stage-2, Stage-7 STAGE PLANS: Stage: Stage-1 @@ -521,6 +521,40 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: @@ -558,40 +592,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 22 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-12 Map Reduce Local Work Alias -> Map Local Tables: diff --git ql/src/test/results/clientpositive/explain_rearrange.q.out ql/src/test/results/clientpositive/explain_rearrange.q.out index c4a015e..56b9c1c 100644 --- ql/src/test/results/clientpositive/explain_rearrange.q.out +++ ql/src/test/results/clientpositive/explain_rearrange.q.out @@ -64,9 +64,9 @@ STAGE DEPENDENCIES: Stage-10 has a backup stage: Stage-2 [MAPREDLOCAL] Stage-7 depends on stages: Stage-10 [MAPRED] Stage-3 depends on stages: Stage-2, Stage-7, Stage-8 [MAPRED] + Stage-2 [MAPRED] Stage-11 has a backup stage: Stage-2 [MAPREDLOCAL] Stage-8 depends on stages: Stage-11 [MAPRED] - Stage-2 [MAPRED] Stage-5 is a root stage [MAPRED] Stage-0 depends on stages: Stage-3 [FETCH] @@ -168,6 +168,39 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: @@ -205,39 +238,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 Map Reduce Map Operator Tree: @@ -325,13 +325,13 @@ order by src1.key, src1.cnt1, src2.cnt1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage [MAPRED] - Stage-2 depends on stages: Stage-1, Stage-9 , consists of Stage-3, Stage-6, Stage-8 [CONDITIONAL] - Stage-3 has a backup stage: Stage-8 [MAPREDLOCAL] + Stage-2 depends on stages: Stage-1, Stage-9 , consists of Stage-3, Stage-7, Stage-6 [CONDITIONAL] + Stage-3 has a backup stage: Stage-6 [MAPREDLOCAL] Stage-4 depends on stages: Stage-3 [MAPRED] - Stage-5 depends on stages: Stage-8, Stage-4, Stage-7 [MAPRED] - Stage-6 has a backup stage: Stage-8 [MAPREDLOCAL] - Stage-7 depends on stages: Stage-6 [MAPRED] - Stage-8 [MAPRED] + Stage-5 depends on stages: Stage-6, Stage-4, Stage-8 [MAPRED] + Stage-6 [MAPRED] + Stage-7 has a backup stage: Stage-6 [MAPREDLOCAL] + Stage-8 depends on stages: Stage-7 [MAPRED] Stage-9 is a root stage [MAPRED] Stage-10 depends on stages: Stage-5 [FETCH] @@ -434,6 +434,39 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -447,7 +480,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -470,39 +503,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-9 Map Reduce Map Operator Tree: @@ -591,13 +591,13 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage [MAPRED] Stage-2 is a root stage [MAPRED] - Stage-3 depends on stages: Stage-1, Stage-2 , consists of Stage-4, Stage-6, Stage-8 [CONDITIONAL] - Stage-4 has a backup stage: Stage-8 [MAPREDLOCAL] + Stage-3 depends on stages: Stage-1, Stage-2 , consists of Stage-4, Stage-7, Stage-6 [CONDITIONAL] + Stage-4 has a backup stage: Stage-6 [MAPREDLOCAL] Stage-5 depends on stages: Stage-4 [MAPRED] - Stage-6 has a backup stage: Stage-8 [MAPREDLOCAL] - Stage-7 depends on stages: Stage-6 [MAPRED] - Stage-8 [MAPRED] - Stage-9 depends on stages: Stage-8, Stage-5, Stage-7 [MAPRED] + Stage-6 [MAPRED] + Stage-7 has a backup stage: Stage-6 [MAPREDLOCAL] + Stage-8 depends on stages: Stage-7 [MAPRED] + Stage-9 depends on stages: Stage-6, Stage-5, Stage-8 [MAPRED] Stage-10 depends on stages: Stage-9 [FETCH] STAGE PLANS: @@ -720,6 +720,39 @@ STAGE PLANS: Map Reduce Local Work Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -733,7 +766,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -756,39 +789,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-8 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-9 Map Reduce Map Operator Tree: diff --git ql/src/test/results/clientpositive/identity_project_remove_skip.q.out ql/src/test/results/clientpositive/identity_project_remove_skip.q.out index e7bd0ed..77cd639 100644 --- ql/src/test/results/clientpositive/identity_project_remove_skip.q.out +++ ql/src/test/results/clientpositive/identity_project_remove_skip.q.out @@ -21,11 +21,11 @@ STAGE DEPENDENCIES: Stage-6 depends on stages: Stage-1, Stage-3 , consists of Stage-7, Stage-8, Stage-2 Stage-7 has a backup stage: Stage-2 Stage-4 depends on stages: Stage-7 + Stage-2 Stage-8 has a backup stage: Stage-2 Stage-5 depends on stages: Stage-8 - Stage-2 Stage-3 is a root stage - Stage-0 depends on stages: Stage-4, Stage-5, Stage-2 + Stage-0 depends on stages: Stage-4, Stage-2, Stage-5 STAGE PLANS: Stage: Stage-1 @@ -92,6 +92,41 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '105' (type: string) + sort order: + + Map-reduce partition columns: '105' (type: string) + Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: '105' (type: string) + sort order: + + Map-reduce partition columns: '105' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '105' (type: string), 'val_105' (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: @@ -128,41 +163,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Map-reduce partition columns: '105' (type: string) - Statistics: Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: '105' (type: string) - sort order: + - Map-reduce partition columns: '105' (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: '105' (type: string), 'val_105' (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-3 Map Reduce Map Operator Tree: diff --git ql/src/test/results/clientpositive/join29.q.out ql/src/test/results/clientpositive/join29.q.out index 6d650e3..3b51652 100644 --- ql/src/test/results/clientpositive/join29.q.out +++ ql/src/test/results/clientpositive/join29.q.out @@ -33,9 +33,9 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-8 Stage-0 depends on stages: Stage-2, Stage-5, Stage-6 Stage-3 depends on stages: Stage-0 + Stage-2 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 - Stage-2 Stage-4 is a root stage STAGE PLANS: @@ -132,44 +132,6 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - Local Work: - Map Reduce Local Work - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -209,6 +171,44 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Select Operator + expressions: _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Local Work: + Map Reduce Local Work + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git ql/src/test/results/clientpositive/join31.q.out ql/src/test/results/clientpositive/join31.q.out index 9248cd9..4c541f6 100644 --- ql/src/test/results/clientpositive/join31.q.out +++ ql/src/test/results/clientpositive/join31.q.out @@ -36,9 +36,9 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2, Stage-6, Stage-7 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-2 Stage-10 has a backup stage: Stage-2 Stage-7 depends on stages: Stage-10 - Stage-2 Stage-5 is a root stage STAGE PLANS: @@ -166,6 +166,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: @@ -208,47 +249,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 Map Reduce Map Operator Tree: diff --git ql/src/test/results/clientpositive/multiMapJoin1.q.out ql/src/test/results/clientpositive/multiMapJoin1.q.out index 0a64c79..00876d7 100644 --- ql/src/test/results/clientpositive/multiMapJoin1.q.out +++ ql/src/test/results/clientpositive/multiMapJoin1.q.out @@ -775,18 +775,18 @@ STAGE DEPENDENCIES: Stage-22 has a backup stage: Stage-4 Stage-10 depends on stages: Stage-22 Stage-5 depends on stages: Stage-4, Stage-10, Stage-11 + Stage-4 Stage-23 has a backup stage: Stage-4 Stage-11 depends on stages: Stage-23 - Stage-4 + Stage-3 Stage-25 has a backup stage: Stage-3 Stage-14 depends on stages: Stage-25 - Stage-3 + Stage-2 Stage-27 has a backup stage: Stage-2 Stage-17 depends on stages: Stage-27 - Stage-2 + Stage-1 Stage-29 has a backup stage: Stage-1 Stage-20 depends on stages: Stage-29 - Stage-1 Stage-0 depends on stages: Stage-5 STAGE PLANS: @@ -995,50 +995,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-23 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col2 (type: string) - 1 key (type: string) - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - alias: smalltbl4 - Filter Operator - predicate: key is not null (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-4 Map Reduce Map Operator Tree: @@ -1085,43 +1041,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-25 + Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: - join3:$INTNAME + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - join3:$INTNAME + $INTNAME TableScan HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 key (type: string) - Stage: Stage-14 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - alias: smalltbl3 + alias: smalltbl4 Filter Operator predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -1166,37 +1126,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-27 + Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: - join3:join2:$INTNAME + join3:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - join3:join2:$INTNAME + join3:$INTNAME TableScan HashTable Sink Operator keys: - 0 _col3 (type: string) - 1 value (type: string) + 0 _col1 (type: string) + 1 key (type: string) - Stage: Stage-17 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan - alias: smalltbl2 + alias: smalltbl3 Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col1 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false table: @@ -1248,40 +1208,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-29 + Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: - join3:join2:join1:bigtbl + join3:join2:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - join3:join2:join1:bigtbl + join3:join2:$INTNAME TableScan - alias: bigtbl - Filter Operator - predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) - HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key (type: string) + HashTable Sink Operator + keys: + 0 _col3 (type: string) + 1 value (type: string) - Stage: Stage-20 + Stage: Stage-17 Map Reduce Map Operator Tree: TableScan - alias: smalltbl1 + alias: smalltbl2 Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 key1 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col6 + 0 _col3 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false table: @@ -1337,6 +1294,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-29 + Map Reduce Local Work + Alias -> Map Local Tables: + join3:join2:join1:bigtbl + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + join3:join2:join1:bigtbl + TableScan + alias: bigtbl + Filter Operator + predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) + HashTable Sink Operator + keys: + 0 key1 (type: string) + 1 key (type: string) + + Stage: Stage-20 + Map Reduce + Map Operator Tree: + TableScan + alias: smalltbl1 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key1 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: -1 @@ -2161,18 +2161,18 @@ STAGE DEPENDENCIES: Stage-22 has a backup stage: Stage-4 Stage-10 depends on stages: Stage-22 Stage-5 depends on stages: Stage-4, Stage-10, Stage-11 + Stage-4 Stage-23 has a backup stage: Stage-4 Stage-11 depends on stages: Stage-23 - Stage-4 + Stage-3 Stage-25 has a backup stage: Stage-3 Stage-14 depends on stages: Stage-25 - Stage-3 + Stage-2 Stage-27 has a backup stage: Stage-2 Stage-17 depends on stages: Stage-27 - Stage-2 + Stage-1 Stage-29 has a backup stage: Stage-1 Stage-20 depends on stages: Stage-29 - Stage-1 Stage-0 depends on stages: Stage-5 STAGE PLANS: @@ -2381,50 +2381,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-23 - Map Reduce Local Work - Alias -> Map Local Tables: - $INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col2 (type: string) - 1 key (type: string) - - Stage: Stage-11 - Map Reduce - Map Operator Tree: - TableScan - alias: smalltbl4 - Filter Operator - predicate: key is not null (type: boolean) - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 - Group By Operator - aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Local Work: - Map Reduce Local Work - Stage: Stage-4 Map Reduce Map Operator Tree: @@ -2471,43 +2427,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-25 + Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: - join3:$INTNAME + $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - join3:$INTNAME + $INTNAME TableScan HashTable Sink Operator keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 key (type: string) - Stage: Stage-14 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - alias: smalltbl3 + alias: smalltbl4 Filter Operator predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) + 0 _col2 (type: string) 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 + Group By Operator + aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -2552,37 +2512,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-27 + Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: - join3:join2:$INTNAME + join3:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - join3:join2:$INTNAME + join3:$INTNAME TableScan HashTable Sink Operator keys: - 0 _col3 (type: string) - 1 value (type: string) + 0 _col1 (type: string) + 1 key (type: string) - Stage: Stage-17 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan - alias: smalltbl2 + alias: smalltbl3 Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 value (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + 0 _col1 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false table: @@ -2634,40 +2594,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-29 + Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: - join3:join2:join1:bigtbl + join3:join2:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - join3:join2:join1:bigtbl + join3:join2:$INTNAME TableScan - alias: bigtbl - Filter Operator - predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) - HashTable Sink Operator - keys: - 0 key1 (type: string) - 1 key (type: string) + HashTable Sink Operator + keys: + 0 _col3 (type: string) + 1 value (type: string) - Stage: Stage-20 + Stage: Stage-17 Map Reduce Map Operator Tree: TableScan - alias: smalltbl1 + alias: smalltbl2 Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 key1 (type: string) - 1 key (type: string) - outputColumnNames: _col0, _col1, _col2, _col6 + 0 _col3 (type: string) + 1 value (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false table: @@ -2723,6 +2680,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-29 + Map Reduce Local Work + Alias -> Map Local Tables: + join3:join2:join1:bigtbl + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + join3:join2:join1:bigtbl + TableScan + alias: bigtbl + Filter Operator + predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) + HashTable Sink Operator + keys: + 0 key1 (type: string) + 1 key (type: string) + + Stage: Stage-20 + Map Reduce + Map Operator Tree: + TableScan + alias: smalltbl1 + Filter Operator + predicate: key is not null (type: boolean) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key1 (type: string) + 1 key (type: string) + outputColumnNames: _col0, _col1, _col2, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/multiMapJoin2.q.out ql/src/test/results/clientpositive/multiMapJoin2.q.out index b0bdfbd..24ffc95 100644 --- ql/src/test/results/clientpositive/multiMapJoin2.q.out +++ ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -750,9 +750,9 @@ STAGE DEPENDENCIES: Stage-10 depends on stages: Stage-15 Stage-4 depends on stages: Stage-3, Stage-10, Stage-11 Stage-5 depends on stages: Stage-4 + Stage-3 Stage-16 has a backup stage: Stage-3 Stage-11 depends on stages: Stage-16 - Stage-3 Stage-18 is a root stage Stage-8 depends on stages: Stage-18 Stage-0 depends on stages: Stage-5 @@ -914,6 +914,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: @@ -953,43 +990,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: @@ -1438,9 +1438,9 @@ STAGE DEPENDENCIES: Stage-8 depends on stages: Stage-12 Stage-4 depends on stages: Stage-3, Stage-8, Stage-9 Stage-5 depends on stages: Stage-4 + Stage-3 Stage-13 has a backup stage: Stage-3 Stage-9 depends on stages: Stage-13 - Stage-3 Stage-14 is a root stage Stage-2 depends on stages: Stage-14 Stage-0 depends on stages: Stage-5 @@ -1572,6 +1572,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: @@ -1614,47 +1655,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: diff --git ql/src/test/results/clientpositive/spark/auto_join0.q.out ql/src/test/results/clientpositive/spark/auto_join0.q.out index 7f8eb63..470eef4 100644 --- ql/src/test/results/clientpositive/spark/auto_join0.q.out +++ ql/src/test/results/clientpositive/spark/auto_join0.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[36][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( @@ -22,12 +22,13 @@ SELECT src1.key as k1, src1.value as v1, ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -119,13 +120,82 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Reducer 6 (SORT, 2) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (key < 10) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string), _col1 (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (key < 10) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[20][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[36][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select sum(hash(a.k1,a.v1,a.k2, a.v2)) from ( SELECT src1.key as k1, src1.value as v1, diff --git ql/src/test/results/clientpositive/spark/auto_join1.q.out ql/src/test/results/clientpositive/spark/auto_join1.q.out index b640b9d..6b2783b 100644 --- ql/src/test/results/clientpositive/spark/auto_join1.q.out +++ ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -90,6 +91,50 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col6 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/auto_join10.q.out ql/src/test/results/clientpositive/spark/auto_join10.q.out index f01765c..3072bce 100644 --- ql/src/test/results/clientpositive/spark/auto_join10.q.out +++ ql/src/test/results/clientpositive/spark/auto_join10.q.out @@ -15,12 +15,13 @@ ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -101,6 +102,69 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join11.q.out ql/src/test/results/clientpositive/spark/auto_join11.q.out index 69c10e6..7aded68 100644 --- ql/src/test/results/clientpositive/spark/auto_join11.q.out +++ ql/src/test/results/clientpositive/spark/auto_join11.q.out @@ -15,12 +15,13 @@ JOIN ON src1.c1 = src2.c3 AND src1.c1 < 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -101,6 +102,69 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (key < 100) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (key < 100) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join12.q.out ql/src/test/results/clientpositive/spark/auto_join12.q.out index bc763ed..6126bbb 100644 --- ql/src/test/results/clientpositive/spark/auto_join12.q.out +++ ql/src/test/results/clientpositive/spark/auto_join12.q.out @@ -21,12 +21,13 @@ JOIN ON src1.c1 = src3.c5 AND src3.c5 < 80 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -143,6 +144,100 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (((UDFToDouble(key) < 80.0) and (UDFToDouble(key) < 100.0)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: ((UDFToDouble(key) < 100.0) and (UDFToDouble(key) < 80.0)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col3 + Select Operator + expressions: hash(_col1,_col3) (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2 + Select Operator + expressions: _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col2 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join13.q.out ql/src/test/results/clientpositive/spark/auto_join13.q.out index 935ebf5..239c693 100644 --- ql/src/test/results/clientpositive/spark/auto_join13.q.out +++ ql/src/test/results/clientpositive/spark/auto_join13.q.out @@ -21,12 +21,13 @@ JOIN ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -142,6 +143,99 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 9 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (UDFToDouble(key) < 100.0) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: ((UDFToDouble(key) < 200.0) and UDFToDouble(key) is not null) (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Map 8 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (UDFToDouble(key) < 100.0) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col2, _col3 + Select Operator + expressions: hash(_col3,_col2) (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: (UDFToDouble(_col2) + UDFToDouble(_col0)) is not null (type: boolean) + Reduce Output Operator + key expressions: (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + sort order: + + Map-reduce partition columns: (UDFToDouble(_col2) + UDFToDouble(_col0)) (type: double) + value expressions: _col1 (type: string), _col2 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join14.q.out ql/src/test/results/clientpositive/spark/auto_join14.q.out index 830314e..116ca23 100644 --- ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -19,13 +19,14 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -94,6 +95,50 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (key > 100) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Filter Operator + predicate: ((key > 100) and key is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col6 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY @@ -101,6 +146,8 @@ PREHOOK: Input: default@src PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 POSTHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value @@ -109,6 +156,8 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/spark/auto_join15.q.out ql/src/test/results/clientpositive/spark/auto_join15.q.out index 780540b..13bf31f 100644 --- ql/src/test/results/clientpositive/spark/auto_join15.q.out +++ ql/src/test/results/clientpositive/spark/auto_join15.q.out @@ -15,12 +15,13 @@ SORT BY k1, v1, k2, v2 ) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -108,6 +109,76 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join16.q.out ql/src/test/results/clientpositive/spark/auto_join16.q.out index f705339..e9752fd 100644 --- ql/src/test/results/clientpositive/spark/auto_join16.q.out +++ ql/src/test/results/clientpositive/spark/auto_join16.q.out @@ -15,12 +15,13 @@ ON (subq.key = tab.key and subq.key > 20 and subq.value = tab.value) where tab.value < 200 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -97,6 +98,65 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: ((((key > 10) and value is not null) and (key > 20)) and (value < 200)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: tab + Filter Operator + predicate: ((((key > 20) and value is not null) and key is not null) and (value < 200)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3 + Group By Operator + aggregations: sum(hash(_col0,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join17.q.out ql/src/test/results/clientpositive/spark/auto_join17.q.out index 3144db6..67dabd8 100644 --- ql/src/test/results/clientpositive/spark/auto_join17.q.out +++ ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -90,6 +91,51 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col5) (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/auto_join19.q.out ql/src/test/results/clientpositive/spark/auto_join19.q.out index f2b0140..51dac00 100644 --- ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -17,13 +17,14 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -92,6 +93,50 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col8 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git ql/src/test/results/clientpositive/spark/auto_join2.q.out ql/src/test/results/clientpositive/spark/auto_join2.q.out index 2424cca..b752122 100644 --- ql/src/test/results/clientpositive/spark/auto_join2.q.out +++ ql/src/test/results/clientpositive/spark/auto_join2.q.out @@ -15,14 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-1 depends on stages: Stage-3 + Stage-5 is a root stage has a backup stage: Stage-3 + Stage-4 depends on stages: Stage-5 has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-4 + Stage: Stage-5 Spark #### A masked pattern was here #### Vertices: @@ -45,7 +46,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -135,6 +136,83 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Filter Operator + predicate: (UDFToDouble(_col0) + UDFToDouble(_col1)) is not null (type: boolean) + Reduce Output Operator + key expressions: (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) + sort order: + + Map-reduce partition columns: (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) + value expressions: _col0 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/auto_join20.q.out ql/src/test/results/clientpositive/spark/auto_join20.q.out index 9258f3b..ae32e4a 100644 --- ql/src/test/results/clientpositive/spark/auto_join20.q.out +++ ql/src/test/results/clientpositive/spark/auto_join20.q.out @@ -15,12 +15,13 @@ SORT BY k1,v1,k2,v2,k3,v3 )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -136,6 +137,90 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 7 (SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 20)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -178,12 +263,13 @@ SORT BY k1,v1,k2,v2,k3,v3 )a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -299,6 +385,90 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 7 (SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: ((key < 10) and (key < 15)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: ((key < 15) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join0 to 2 + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 20)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Group By Operator + aggregations: sum(hash(_col0,_col1,_col2,_col3,_col4,_col5)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join21.q.out ql/src/test/results/clientpositive/spark/auto_join21.q.out index aa8f6dd..0f1dd6d 100644 --- ql/src/test/results/clientpositive/spark/auto_join21.q.out +++ ql/src/test/results/clientpositive/spark/auto_join21.q.out @@ -9,12 +9,13 @@ explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -106,6 +107,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key > 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join22.q.out ql/src/test/results/clientpositive/spark/auto_join22.q.out index d49dda9..2f113b5 100644 --- ql/src/test/results/clientpositive/spark/auto_join22.q.out +++ ql/src/test/results/clientpositive/spark/auto_join22.q.out @@ -5,12 +5,13 @@ POSTHOOK: query: explain SELECT sum(hash(src5.src1_value)) FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key FROM src src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -123,6 +124,97 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src4 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src4 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src4 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1 + Select Operator + expressions: hash(_col1) (type: int) + outputColumnNames: _col0 + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join23.q.out ql/src/test/results/clientpositive/spark/auto_join23.q.out index a179d87..017d405 100644 --- ql/src/test/results/clientpositive/spark/auto_join23.q.out +++ ql/src/test/results/clientpositive/spark/auto_join23.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: -- SORT_QUERY_RESULTS explain @@ -10,12 +10,13 @@ explain SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -82,13 +83,62 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + sort order: + value expressions: key (type: string), value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + sort order: + value expressions: key (type: string), value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order: ++++ + Reducer 6 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[12][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[23][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 10 and src2.key < 10 SORT BY src1.key, src1.value, src2.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/spark/auto_join24.q.out ql/src/test/results/clientpositive/spark/auto_join24.q.out index cfb076e..541d45c 100644 --- ql/src/test/results/clientpositive/spark/auto_join24.q.out +++ ql/src/test/results/clientpositive/spark/auto_join24.q.out @@ -25,12 +25,13 @@ POSTHOOK: query: explain SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -103,6 +104,63 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: cnt (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1 + Group By Operator + aggregations: sum(_col1) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join25.q.out ql/src/test/results/clientpositive/spark/auto_join25.q.out index ab01b8a..4c39927 100644 --- ql/src/test/results/clientpositive/spark/auto_join25.q.out +++ ql/src/test/results/clientpositive/spark/auto_join25.q.out @@ -20,6 +20,9 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 +Status: Failed +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.spark.SparkTask POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') @@ -33,7 +36,8 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-1:MAPRED +RUN: Stage-4:MAPRED +RUN: Stage-3:MAPRED RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 @@ -60,14 +64,18 @@ INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest_j2 +Status: Failed +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.spark.SparkTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j2 POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-1:MAPRED +POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src1.FieldSchema(name:value, type:string, comment:default), ] +RUN: Stage-5:MAPRED +RUN: Stage-3:MAPRED RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 @@ -94,6 +102,9 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest_j1 +Status: Failed +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.spark.SparkTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY @@ -101,7 +112,8 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-1:MAPRED +RUN: Stage-4:MAPRED +RUN: Stage-3:MAPRED RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 diff --git ql/src/test/results/clientpositive/spark/auto_join26.q.out ql/src/test/results/clientpositive/spark/auto_join26.q.out index 58821e9..32a3dfe 100644 --- ql/src/test/results/clientpositive/spark/auto_join26.q.out +++ ql/src/test/results/clientpositive/spark/auto_join26.q.out @@ -19,13 +19,14 @@ INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -116,6 +117,67 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/auto_join28.q.out ql/src/test/results/clientpositive/spark/auto_join28.q.out index d30133b..e3525a3 100644 --- ql/src/test/results/clientpositive/spark/auto_join28.q.out +++ ql/src/test/results/clientpositive/spark/auto_join28.q.out @@ -5,12 +5,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -102,6 +103,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key > 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -115,12 +181,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -215,6 +282,73 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key > 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -228,12 +362,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -328,6 +463,73 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -341,12 +543,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -438,6 +641,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join29.q.out ql/src/test/results/clientpositive/spark/auto_join29.q.out index 780c6cb..14d54ad 100644 --- ql/src/test/results/clientpositive/spark/auto_join29.q.out +++ ql/src/test/results/clientpositive/spark/auto_join29.q.out @@ -9,12 +9,13 @@ explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -106,6 +107,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key > 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join1 to 2 + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -627,12 +693,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -727,6 +794,73 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key > 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join1 to 2 + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1248,12 +1382,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1348,6 +1483,73 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Left Outer Join1 to 2 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1881,12 +2083,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1978,6 +2181,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join1 to 2 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2511,12 +2779,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -2602,6 +2871,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join1 to 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2623,12 +2957,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -2723,6 +3058,73 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Right Outer Join1 to 2 + filter predicates: + 0 + 1 + 2 {(KEY.reducesinkkey0 < 10)} + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -3244,12 +3646,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -3344,6 +3747,73 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 1 to 2 + filter predicates: + 0 {(KEY.reducesinkkey0 < 10)} + 1 + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -3365,12 +3835,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -3468,6 +3939,75 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: (key < 10) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -3511,12 +4051,13 @@ POSTHOOK: query: explain SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key > 10) JOIN src src3 ON (src2.key = src3.key AND src3.key < 10) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -3602,6 +4143,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: ((key < 10) and (key > 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: ((key > 10) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join3.q.out ql/src/test/results/clientpositive/spark/auto_join3.q.out index 54e24f3..5d0b9ec 100644 --- ql/src/test/results/clientpositive/spark/auto_join3.q.out +++ ql/src/test/results/clientpositive/spark/auto_join3.q.out @@ -15,13 +15,14 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -121,6 +122,70 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + outputColumnNames: _col0, _col3 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/spark/auto_join30.q.out ql/src/test/results/clientpositive/spark/auto_join30.q.out index 4c832e2..381e27a 100644 --- ql/src/test/results/clientpositive/spark/auto_join30.q.out +++ ql/src/test/results/clientpositive/spark/auto_join30.q.out @@ -15,12 +15,13 @@ ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 5 <- Map 4 (SORT, 2) @@ -125,6 +126,88 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 11 <- Map 10 (SORT, 2) + Reducer 7 <- Map 6 (SORT, 2) + Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 11 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -167,12 +250,13 @@ ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 5 <- Map 4 (SORT, 2) @@ -271,6 +355,84 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 11 <- Map 10 (SORT, 2) + Reducer 7 <- Map 6 (SORT, 2) + Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 11 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -313,12 +475,13 @@ ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 2) @@ -417,6 +580,84 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 11 <- Map 10 (SORT, 2) + Reducer 7 <- Map 6 (SORT, 2) + Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 11 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -465,12 +706,13 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 5 <- Map 4 (SORT, 2) @@ -610,6 +852,112 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 13 <- Map 12 (SORT, 2) + Reducer 15 <- Map 14 (SORT, 2) + Reducer 9 <- Map 8 (SORT, 2) + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 2), Reducer 15 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Reducer 10 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 14 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 13 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 15 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 9 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 @@ -664,12 +1012,13 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 5 <- Map 4 (SORT, 2) @@ -800,6 +1149,106 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 13 <- Map 12 (SORT, 2) + Reducer 15 <- Map 14 (SORT, 2) + Reducer 9 <- Map 8 (SORT, 2) + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 2), Reducer 15 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Reducer 10 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 14 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Left Outer Join0 to 2 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 13 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 15 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 9 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 @@ -854,12 +1303,13 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 5 <- Map 4 (SORT, 2) @@ -990,6 +1440,106 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 13 <- Map 12 (SORT, 2) + Reducer 15 <- Map 14 (SORT, 2) + Reducer 9 <- Map 8 (SORT, 2) + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 2), Reducer 15 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Reducer 10 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 14 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 13 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 15 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 9 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 @@ -1044,12 +1594,13 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 2) @@ -1180,6 +1731,106 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 13 <- Map 12 (SORT, 2) + Reducer 15 <- Map 14 (SORT, 2) + Reducer 9 <- Map 8 (SORT, 2) + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 2), Reducer 15 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Reducer 10 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 14 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Right Outer Join0 to 2 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 13 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 15 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 9 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 @@ -1234,12 +1885,13 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 2) @@ -1370,6 +2022,106 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 13 <- Map 12 (SORT, 2) + Reducer 15 <- Map 14 (SORT, 2) + Reducer 9 <- Map 8 (SORT, 2) + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 2), Reducer 15 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Reducer 10 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 14 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Right Outer Join0 to 2 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 13 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 15 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 9 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join31.q.out ql/src/test/results/clientpositive/spark/auto_join31.q.out index 5980814..151447d 100644 --- ql/src/test/results/clientpositive/spark/auto_join31.q.out +++ ql/src/test/results/clientpositive/spark/auto_join31.q.out @@ -21,12 +21,13 @@ ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 2) @@ -157,6 +158,106 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 13 <- Map 12 (SORT, 2) + Reducer 15 <- Map 14 (SORT, 2) + Reducer 9 <- Map 8 (SORT, 2) + Reducer 10 <- Reducer 13 (PARTITION-LEVEL SORT, 2), Reducer 15 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Reducer 10 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 14 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + value expressions: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 0 to 2 + outputColumnNames: _col2, _col3 + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 13 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 15 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 9 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join32.q.out ql/src/test/results/clientpositive/spark/auto_join32.q.out index 9629f53..a833d38 100644 --- ql/src/test/results/clientpositive/spark/auto_join32.q.out +++ ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -27,12 +27,13 @@ on (s.name = v.name) group by s.name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -108,6 +109,66 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: s + Filter Operator + predicate: name is not null (type: boolean) + Reduce Output Operator + key expressions: name (type: string) + sort order: + + Map-reduce partition columns: name (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: v + Filter Operator + predicate: name is not null (type: boolean) + Reduce Output Operator + key expressions: name (type: string) + sort order: + + Map-reduce partition columns: name (type: string) + value expressions: registration (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col8 + Group By Operator + aggregations: count(DISTINCT _col8) + keys: _col0 (type: string), _col8 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join4.q.out ql/src/test/results/clientpositive/spark/auto_join4.q.out index 3366f75..1f4bf19 100644 --- ql/src/test/results/clientpositive/spark/auto_join4.q.out +++ ql/src/test/results/clientpositive/spark/auto_join4.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -120,6 +121,57 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: ((key > 15) and (key < 25)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git ql/src/test/results/clientpositive/spark/auto_join5.q.out ql/src/test/results/clientpositive/spark/auto_join5.q.out index b6d8798..a831796 100644 --- ql/src/test/results/clientpositive/spark/auto_join5.q.out +++ ql/src/test/results/clientpositive/spark/auto_join5.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -120,6 +121,57 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: ((key > 15) and (key < 25)) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git ql/src/test/results/clientpositive/spark/auto_join8.q.out ql/src/test/results/clientpositive/spark/auto_join8.q.out index 5b6cc80..dfee13d 100644 --- ql/src/test/results/clientpositive/spark/auto_join8.q.out +++ ql/src/test/results/clientpositive/spark/auto_join8.q.out @@ -37,13 +37,14 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -123,6 +124,59 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: (((key > 10) and (key < 20)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (((key > 15) and (key < 25)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Filter Operator + predicate: _col2 is null (type: boolean) + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(null) (type: int), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git ql/src/test/results/clientpositive/spark/auto_join9.q.out ql/src/test/results/clientpositive/spark/auto_join9.q.out index 6daf348..bfecc1c 100644 --- ql/src/test/results/clientpositive/spark/auto_join9.q.out +++ ql/src/test/results/clientpositive/spark/auto_join9.q.out @@ -15,13 +15,14 @@ FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -90,19 +91,69 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col8 + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/spark/auto_join_filters.q.out ql/src/test/results/clientpositive/spark/auto_join_filters.q.out index 8934433..8262340 100644 --- ql/src/test/results/clientpositive/spark/auto_join_filters.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_filters.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -24,7 +24,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 3078400 -Warning: Map Join MAPJOIN[15][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[26][bigTable=a] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -34,7 +34,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4937935 -Warning: Map Join MAPJOIN[15][bigTable=b] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[26][bigTable=b] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -300,7 +300,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in2.txt' into table sm POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_input2 -Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -310,7 +310,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 3078400 -Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -320,7 +320,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 3078400 -Warning: Map Join MAPJOIN[19][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out index 1f37c75..8f9af3f 100644 --- ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/in1.txt' INTO TABLE my POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@myinput1 -Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=a] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -24,7 +24,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 13630578 -Warning: Map Join MAPJOIN[11][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=a] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -34,7 +34,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 13630578 -Warning: Map Join MAPJOIN[11][bigTable=b] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=b] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/spark/auto_join_stats.q.out ql/src/test/results/clientpositive/spark/auto_join_stats.q.out index 1fa1a74..1719c23 100644 --- ql/src/test/results/clientpositive/spark/auto_join_stats.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_stats.q.out @@ -33,12 +33,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -126,6 +127,73 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: smalltable + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + value expressions: key (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + value expressions: _col0 (type: string), _col5 (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5, _col10 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -173,12 +241,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -293,6 +362,97 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 12 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: smalltable + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + value expressions: key (type: string) + Map 12 + Map Operator Tree: + TableScan + alias: smalltable2 + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Map 6 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + value expressions: _col0 (type: string), _col5 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5, _col10 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + value expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5, _col10 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out index c6473d3..bb3e988 100644 --- ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_stats2.q.out @@ -25,12 +25,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -112,6 +113,73 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: smalltable + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + value expressions: key (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + value expressions: _col0 (type: string), _col5 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5, _col10 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -159,12 +227,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select src1.key, src2.key, smalltable.key from src src1 JOIN src src2 ON (src1.key = src2.key) JOIN smalltable ON (src1.key + src2.key = smalltable.key) JOIN smalltable2 ON (src1.key + src2.key = smalltable2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -273,6 +342,97 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: smalltable + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + value expressions: key (type: string) + Map 11 + Map Operator Tree: + TableScan + alias: smalltable2 + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + value expressions: _col0 (type: string), _col5 (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5, _col10 + Filter Operator + predicate: (_col0 + _col5) is not null (type: boolean) + Reduce Output Operator + key expressions: (_col0 + _col5) (type: double) + sort order: + + Map-reduce partition columns: (_col0 + _col5) (type: double) + value expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col5, _col10 + Select Operator + expressions: _col0 (type: string), _col5 (type: string), _col10 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out index 3d465db..44af4d6 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_1.q.out @@ -612,12 +612,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -878,6 +879,239 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out index fe7b96d..90cd6c5 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_10.q.out @@ -214,12 +214,13 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) @@ -322,6 +323,86 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (GROUP, 1) + Reducer 7 <- Map 9 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Map 9 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out index f4e889a..6949429 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_11.q.out @@ -138,12 +138,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -390,6 +391,236 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 @@ -458,12 +689,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -721,6 +953,236 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [b] + /bucket_big/ds=2008-04-09 [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out index c358721..19f0015 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_12.q.out @@ -138,7 +138,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[26][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -204,13 +204,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage has a backup stage: Stage-2 + Stage-3 depends on stages: Stage-4 has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -285,7 +286,7 @@ STAGE PLANS: Truncated Path -> Alias: /bucket_small/ds=2008-04-08 [a] - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -622,13 +623,398 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 12 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) + Reducer 10 <- Reducer 9 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 11 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Truncated Path -> Alias: + /bucket_medium/ds=2008-04-08 [b] + Map 12 + Map Operator Tree: + TableScan + alias: c + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [c] + /bucket_big/ds=2008-04-09 [c] + Map 13 + Map Operator Tree: + TableScan + alias: d + GatherStats: false + Reduce Output Operator + sort order: + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + numFiles 3 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 170 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 3 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_medium + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_medium { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_medium + name: default.bucket_medium + Truncated Path -> Alias: + /bucket_medium/ds=2008-04-08 [d] + Map 6 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [a] + Reducer 10 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 7 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col6 + Reduce Output Operator + key expressions: _col6 (type: string), _col6 (type: string) + sort order: ++ + Map-reduce partition columns: _col6 (type: string), _col6 (type: string) + tag: 0 + auto parallelism: false + Reducer 8 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Reduce Output Operator + sort order: + tag: 0 + auto parallelism: false + Reducer 9 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[26][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[47][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out index 7eadcd0..cdf1891 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out @@ -447,15 +447,16 @@ INSERT OVERWRITE TABLE dest1 select key1, key2 INSERT OVERWRITE TABLE dest2 select value1, value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 is a root stage has a backup stage: Stage-5 + Stage-2 depends on stages: Stage-6 has a backup stage: Stage-5 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Spark #### A masked pattern was here #### Vertices: @@ -553,6 +554,64 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out index 984db20..14ad3a1 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_14.q.out @@ -43,12 +43,13 @@ explain select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -114,6 +115,57 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -150,12 +202,13 @@ explain select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -221,6 +274,57 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out index 2acc323..e4099ff 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_15.q.out @@ -41,12 +41,13 @@ POSTHOOK: query: explain select count(*) FROM tbl1 a LEFT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -112,6 +113,57 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -125,12 +177,13 @@ POSTHOOK: query: explain select count(*) FROM tbl1 a RIGHT OUTER JOIN tbl2 b ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -196,6 +249,57 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out index f05b0cc..5e3c632 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_2.q.out @@ -359,12 +359,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -625,6 +626,239 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out index c7d23f8..579840f 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_3.q.out @@ -494,12 +494,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -760,6 +761,239 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out index f5dc2f7..7d6b7d9 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_4.q.out @@ -510,12 +510,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -776,6 +777,239 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out index 26e7957..f6865f8 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_5.q.out @@ -455,12 +455,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -664,6 +665,183 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out index a5c0562..aa211a7 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_7.q.out @@ -629,12 +629,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -944,6 +945,287 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2750 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 226 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out index ef13a40..16aaa9d 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_8.q.out @@ -631,12 +631,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -946,6 +947,287 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_big + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_big { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_big + name: default.bucket_big + Truncated Path -> Alias: + /bucket_big/ds=2008-04-08 [a] + /bucket_big/ds=2008-04-09 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 114 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types string:string +#### A masked pattern was here #### + name default.bucket_small + partition_columns ds + partition_columns.types string + serialization.ddl struct bucket_small { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucket_small + name: default.bucket_small + Truncated Path -> Alias: + /bucket_small/ds=2008-04-08 [b] + /bucket_small/ds=2008-04-09 [b] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Reducer 6 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index a2b98fc..37164d5 100644 --- ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -404,12 +404,13 @@ join on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 4 <- Map 3 (GROUP, 1) @@ -524,6 +525,128 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 11 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Reducer 12 <- Reducer 11 (GROUP, 1) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 8 <- Reducer 12 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 13 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 12 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1065,12 +1188,13 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1150,6 +1274,67 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: a + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Filter Operator + predicate: _col0 is not null (type: boolean) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1614,12 +1799,13 @@ select count(*) from ( ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1691,6 +1877,61 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1729,12 +1970,13 @@ select key, count(*) from group by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1811,6 +2053,66 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1864,12 +2166,13 @@ select count(*) from ) subq2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1965,6 +2268,81 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2035,14 +2413,15 @@ join on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 + Stage-4 is a root stage has a backup stage: Stage-2 + Stage-3 depends on stages: Stage-4 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-5 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -2061,7 +2440,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 5 <- Map 4 (GROUP, 1) @@ -2114,7 +2493,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-4 + Stage: Stage-5 Spark #### A masked pattern was here #### Vertices: @@ -2203,28 +2582,150 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from -( - select key, count(*) as cnt1 from - ( - select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq1 group by key -) src1 -join -( - select key, count(*) as cnt1 from - ( - select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq2 group by key -) src2 -on src1.key = src2.key -PREHOOK: type: QUERY + Stage: Stage-2 + Spark + Edges: + Reducer 13 <- Map 12 (PARTITION-LEVEL SORT, 1), Map 15 (PARTITION-LEVEL SORT, 1) + Reducer 8 <- Map 11 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 14 <- Reducer 13 (GROUP, 1) + Reducer 9 <- Reducer 8 (GROUP, 1) + Reducer 10 <- Reducer 14 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 11 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 12 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 15 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 14 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### @@ -2272,12 +2773,13 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -2357,6 +2859,67 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2411,12 +2974,13 @@ select count(*) from on subq2.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -2492,6 +3056,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2570,12 +3192,13 @@ select count(*) from on subq2.key = subq4.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -2655,6 +3278,67 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2723,12 +3407,13 @@ select count(*) from on subq1.key = subq2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -2808,6 +3493,67 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 8) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 8) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2848,12 +3594,13 @@ select count(*) from join tbl2 a on subq1.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -2929,6 +3676,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2967,12 +3772,13 @@ select count(*) from tbl1 a on a.key = subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -3048,6 +3854,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -3096,12 +3960,13 @@ select count(*) from on (subq1.key = subq3.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -3204,6 +4069,81 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 8 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Reducer 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 8 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key < 6) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -3266,12 +4206,13 @@ join tbl2 b on subq2.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -3347,6 +4288,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 1), Map 7 (PARTITION-LEVEL SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out index 6230bef..51bd3e4 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out @@ -169,13 +169,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -396,6 +397,180 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 5 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -506,13 +681,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -732,6 +908,185 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 5 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 4 + numRows 1028 + rawDataSize 19022 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 20050 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out index 1a33625..f8df555 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out @@ -153,13 +153,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -380,6 +381,180 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 5 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -490,13 +665,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -727,6 +903,185 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 5 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 4 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out index fed923c..27a1b5d 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out @@ -153,13 +153,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -380,6 +381,180 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 5 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -490,13 +665,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -716,6 +892,185 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + numFiles 2 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 3062 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part + name: default.srcbucket_mapjoin_part + Truncated Path -> Alias: + /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Map 5 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + numFiles 4 + numRows 0 + partition_columns ds + partition_columns.types string + rawDataSize 0 + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 4 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.srcbucket_mapjoin_part_2 + partition_columns ds + partition_columns.types string + serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcbucket_mapjoin_part_2 + name: default.srcbucket_mapjoin_part_2 + Truncated Path -> Alias: + /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 4 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value1,value2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.bucketmapjoin_tmp_result + numFiles 4 + numRows 564 + rawDataSize 10503 + serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 11067 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.bucketmapjoin_tmp_result + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out index 8b5e8d4..64c6220 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out @@ -138,12 +138,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -416,6 +417,258 @@ STAGE PLANS: Truncated Path -> Alias: /tbl2 [b] + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl1 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl1 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tbl1 + name: default.tbl1 + Truncated Path -> Alias: + /tbl1 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl2 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl2 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tbl2 + name: default.tbl2 + Truncated Path -> Alias: + /tbl2 [b] + Map 8 + Map Operator Tree: + TableScan + alias: c + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl3 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl3 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tbl3 + name: default.tbl3 + Truncated Path -> Alias: + /tbl3 [c] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col6 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 0 + value expressions: _col0 (type: int), _col6 (type: string) + auto parallelism: false + Reducer 6 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col6, _col11 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 @@ -570,12 +823,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -837,6 +1091,258 @@ STAGE PLANS: Truncated Path -> Alias: /tbl2 [b] + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 0 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl1 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl1 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tbl1 + name: default.tbl1 + Truncated Path -> Alias: + /tbl1 [a] + Map 7 + Map Operator Tree: + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + tag: 1 + value expressions: value (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl2 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl2 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tbl2 + name: default.tbl2 + Truncated Path -> Alias: + /tbl2 [b] + Map 8 + Map Operator Tree: + TableScan + alias: c + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl3 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 2 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.tbl3 + numFiles 2 + numRows 10 + rawDataSize 70 + serialization.ddl struct tbl3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 80 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.tbl3 + name: default.tbl3 + Truncated Path -> Alias: + /tbl3 [c] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col6 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 0 + value expressions: _col0 (type: int), _col6 (type: string) + auto parallelism: false + Reducer 6 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col6, _col11 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 1c81d1b..2071c1a 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -117,12 +117,13 @@ select a.key, a.value, b.value from tab a join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -177,6 +178,50 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -198,12 +243,13 @@ from (select sum(substr(srcbucket_mapjoin.value,5)) as v1, key as k1 from srcbuc join tab b on a.k1 = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -284,6 +330,71 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 6 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin + Filter Operator + predicate: key is not null (type: boolean) + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: double) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + value expressions: _col0 (type: double) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -301,13 +412,14 @@ from (select sum(substr(tab.value,5)) as v1, key as k1 from tab_part join tab on join tab b on a.k1 = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-3 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-3 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-4 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -326,7 +438,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -419,6 +531,93 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: tab_part + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: tab + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col6, _col7 + Group By Operator + aggregations: sum(substr(_col7, 5)) + keys: _col6 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: double) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + value expressions: _col0 (type: double) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -436,13 +635,14 @@ from (select sum(substr(x.value,5)) as v1, x.key as k1 from tab x join tab y on join tab_part b on a.k1 = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage has a backup stage: Stage-2 + Stage-3 depends on stages: Stage-4 has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -461,7 +661,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -554,6 +754,93 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: sum(substr(_col1, 5)) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: double) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col1 (type: double), _col0 (type: int) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + value expressions: _col0 (type: double) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -571,12 +858,13 @@ select a.key, a.value, b.value from tab_part a join tab b on a.key = b.key join tab c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -650,6 +938,61 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: c + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -665,12 +1008,13 @@ select a.key, a.value, c.value from (select x.key, x.value from tab_part x join tab y on x.key = y.key) a join tab c on a.key = c.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -749,6 +1093,72 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 8 + Map Operator Tree: + TableScan + alias: c + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -768,12 +1178,13 @@ from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -850,6 +1261,68 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 6 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin + Filter Operator + predicate: key is not null (type: boolean) + Group By Operator + aggregations: sum(substr(value, 5)) + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: double) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: double) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -867,12 +1340,13 @@ from (select key, sum(substr(srcbucket_mapjoin.value,5)) as value from srcbucket join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -943,6 +1417,63 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 6 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: srcbucket_mapjoin + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: substr(value, 5) (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: double) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -960,12 +1491,13 @@ select a.key, a.value, b.value from tab a join tab_part b on a.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1020,6 +1552,49 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: value is not null (type: boolean) + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + value expressions: key (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: value is not null (type: boolean) + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col7 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1057,12 +1632,13 @@ select a.key, a.value, b.value from tab1 a join tab_part b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1117,6 +1693,50 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1128,12 +1748,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1212,6 +1833,72 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: c + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 8 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: value is not null (type: boolean) + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + value expressions: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col12 + Select Operator + expressions: _col0 (type: int), _col12 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index 04a934f..b81b32e 100644 --- ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -113,12 +113,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -197,6 +198,72 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: c + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 8 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: value is not null (type: boolean) + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + value expressions: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col12 + Select Operator + expressions: _col0 (type: int), _col12 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -234,12 +301,13 @@ select a.key, a.value, b.value from tab1 a join src b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -294,6 +362,50 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + value expressions: key (type: int), value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -307,12 +419,13 @@ POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -375,6 +488,54 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Filter Operator + predicate: (key > 1) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: tab_part + Filter Operator + predicate: (key > 2) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -388,12 +549,13 @@ POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -456,6 +618,54 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Filter Operator + predicate: (key > 1) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: tab_part + Filter Operator + predicate: (key > 2) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -469,12 +679,13 @@ POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -537,6 +748,54 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: tab_part + Filter Operator + predicate: (key > 1) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: tab_part + Filter Operator + predicate: (key > 2) (type: boolean) + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -548,12 +807,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -627,6 +887,63 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 6 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: tab + Filter Operator + predicate: key is not null (type: boolean) + Group By Operator + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -638,12 +955,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -717,6 +1035,65 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (GROUP, 2) + Reducer 6 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: tab + Filter Operator + predicate: UDFToDouble(value) is not null (type: boolean) + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: UDFToDouble(key) is not null (type: boolean) + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + value expressions: _col0 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col2 + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out index 365306e..2807429 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out @@ -95,13 +95,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -184,6 +185,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col7) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -191,8 +246,10 @@ ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table1@ds=2 PREHOOK: Input: default@test_table2 PREHOOK: Input: default@test_table2@ds=1 +PREHOOK: Input: default@test_table2@ds=2 PREHOOK: Output: default@test_table3@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -201,8 +258,10 @@ ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1 POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table1@ds=2 POSTHOOK: Input: default@test_table2 POSTHOOK: Input: default@test_table2@ds=1 +POSTHOOK: Input: default@test_table2@ds=2 POSTHOOK: Output: default@test_table3@ds=1 POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] @@ -265,13 +324,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds is not null and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -354,6 +414,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col7) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -364,6 +478,7 @@ PREHOOK: Input: default@test_table1@ds=1 PREHOOK: Input: default@test_table1@ds=2 PREHOOK: Input: default@test_table2 PREHOOK: Input: default@test_table2@ds=1 +PREHOOK: Input: default@test_table2@ds=2 PREHOOK: Output: default@test_table3@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -375,6 +490,7 @@ POSTHOOK: Input: default@test_table1@ds=1 POSTHOOK: Input: default@test_table1@ds=2 POSTHOOK: Input: default@test_table2 POSTHOOK: Input: default@test_table2@ds=1 +POSTHOOK: Input: default@test_table2@ds=2 POSTHOOK: Output: default@test_table3@ds=1 POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)a.FieldSchema(name:value, type:string, comment:null), (test_table2)b.FieldSchema(name:value, type:string, comment:null), ] @@ -459,13 +575,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds is not null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -548,6 +665,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col7) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -555,6 +726,7 @@ ON a.key = b.key WHERE a.ds = '1' and b.ds is not null PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table1@ds=2 PREHOOK: Input: default@test_table2 PREHOOK: Input: default@test_table2@ds=1 PREHOOK: Input: default@test_table2@ds=2 @@ -566,6 +738,7 @@ ON a.key = b.key WHERE a.ds = '1' and b.ds is not null POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1 POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table1@ds=2 POSTHOOK: Input: default@test_table2 POSTHOOK: Input: default@test_table2@ds=1 POSTHOOK: Input: default@test_table2@ds=2 @@ -657,13 +830,14 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -754,6 +928,66 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: test_table1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: test_table2 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -764,8 +998,10 @@ ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table1@ds=2 PREHOOK: Input: default@test_table2 PREHOOK: Input: default@test_table2@ds=1 +PREHOOK: Input: default@test_table2@ds=2 PREHOOK: Output: default@test_table3@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -777,8 +1013,10 @@ ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1 POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table1@ds=2 POSTHOOK: Input: default@test_table2 POSTHOOK: Input: default@test_table2@ds=1 +POSTHOOK: Input: default@test_table2@ds=2 POSTHOOK: Output: default@test_table3@ds=1 POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ] @@ -845,13 +1083,14 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -942,6 +1181,66 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: test_table1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), concat(value, value) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: test_table2 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), concat(value, value) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) FROM @@ -952,8 +1251,10 @@ ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table1@ds=2 PREHOOK: Input: default@test_table2 PREHOOK: Input: default@test_table2@ds=1 +PREHOOK: Input: default@test_table2@ds=2 PREHOOK: Output: default@test_table3@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) @@ -965,8 +1266,10 @@ ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1 POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table1@ds=2 POSTHOOK: Input: default@test_table2 POSTHOOK: Input: default@test_table2@ds=1 +POSTHOOK: Input: default@test_table2@ds=2 POSTHOOK: Output: default@test_table3@ds=1 POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ] @@ -1033,13 +1336,14 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -1130,6 +1434,66 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: test_table1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: test_table2 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value) FROM @@ -1140,8 +1504,10 @@ ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 PREHOOK: Input: default@test_table1@ds=1 +PREHOOK: Input: default@test_table1@ds=2 PREHOOK: Input: default@test_table2 PREHOOK: Input: default@test_table2@ds=1 +PREHOOK: Input: default@test_table2@ds=2 PREHOOK: Output: default@test_table3@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value) @@ -1153,8 +1519,10 @@ ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table1 POSTHOOK: Input: default@test_table1@ds=1 +POSTHOOK: Input: default@test_table1@ds=2 POSTHOOK: Input: default@test_table2 POSTHOOK: Input: default@test_table2@ds=1 +POSTHOOK: Input: default@test_table2@ds=2 POSTHOOK: Output: default@test_table3@ds=1 POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key EXPRESSION [(test_table1)test_table1.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value EXPRESSION [(test_table1)test_table1.FieldSchema(name:value, type:string, comment:null), (test_table2)test_table2.FieldSchema(name:value, type:string, comment:null), ] diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out index 3846de7..667097c 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out @@ -71,13 +71,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -160,6 +161,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), _col0 (type: int), concat(_col1, _col7) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -260,13 +315,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -345,6 +401,56 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a JOIN test_table2 b diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out index 5b559c4..d950843 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out @@ -73,13 +73,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -162,6 +163,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col9 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col9) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: +- + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -250,13 +305,14 @@ ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1' )subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -339,6 +395,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col9 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col9) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: +- + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key, subq1.key2, subq1.value from ( @@ -427,13 +537,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -516,6 +627,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col9 + Select Operator + expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col9) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: +- + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table -- This should be a map-reduce operation EXPLAIN @@ -539,13 +704,14 @@ ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1' )subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -628,6 +794,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col9 + Select Operator + expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col9) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: +- + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: -- Insert data into the bucketed table by selecting from another bucketed table -- This should be a map-only operation EXPLAIN @@ -657,13 +877,14 @@ ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1' )subq2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -746,6 +967,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col9 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col9) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: +- + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from ( @@ -852,13 +1127,14 @@ ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1' )subq2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -941,6 +1217,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col9 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col9) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: +- + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.k2, subq2.k1, subq2.value from ( @@ -1057,13 +1387,14 @@ ON a.key = b.key and a.key2 = b.key2 WHERE a.ds = '1' and b.ds = '1' )subq2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -1146,3 +1477,57 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and key2 is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col9 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col9) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: -- + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table4 + diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out index cefc6aa..e057ec1 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out @@ -73,13 +73,14 @@ ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' and (a.key = 0 or a.key = 5) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -162,6 +163,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key is not null and ((key = 0) or (key = 5))) (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and ((key = 0) or (key = 5))) (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col7 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col7) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -245,13 +300,14 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -342,6 +398,66 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: test_table1 + Filter Operator + predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: test_table2 + Filter Operator + predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -431,13 +547,14 @@ ON a.key = b.key WHERE a.key = 0 or a.key = 5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -528,6 +645,66 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: test_table1 + Filter Operator + predicate: (((key < 8) and key is not null) and ((key = 0) or (key = 5))) (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: test_table2 + Filter Operator + predicate: (((key < 8) and key is not null) and ((key = 0) or (key = 5))) (type: boolean) + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: int), concat(_col1, _col3) (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM diff --git ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out index ca44d7c..6bfcff5 100644 --- ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out +++ ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out @@ -71,13 +71,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -160,6 +161,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col6, _col7 + Select Operator + expressions: _col0 (type: int), _col6 (type: int), concat(_col1, _col7) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -242,13 +297,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -331,6 +387,60 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col6, _col7 + Select Operator + expressions: _col6 (type: int), _col0 (type: int), concat(_col1, _col7) (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reducer 6 + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b diff --git ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out index dda6c38..1aa99d8 100644 --- ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out +++ ql/src/test/results/clientpositive/spark/cross_product_check_2.q.out @@ -24,18 +24,19 @@ POSTHOOK: type: CREATETABLE_AS_SELECT POSTHOOK: Input: default@src POSTHOOK: Output: database:default POSTHOOK: Output: default@B -Warning: Map Join MAPJOIN[7][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[14][bigTable=a] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from A join B PREHOOK: type: QUERY POSTHOOK: query: explain select * from A join B POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -84,25 +85,62 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + sort order: + value expressions: key (type: string), value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: b + Reduce Output Operator + sort order: + value expressions: key (type: string), value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[15][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=a] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A PREHOOK: type: QUERY POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage has a backup stage: Stage-2 + Stage-3 depends on stages: Stage-4 has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -121,7 +159,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -183,13 +221,74 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: d1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: d2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + sort order: + value expressions: key (type: string), value (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[21][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[37][bigTable=a] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 on d1.key = d2.key @@ -201,13 +300,14 @@ POSTHOOK: query: explain select * from A join where 1 = 1 group by d1.key) od1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage has a backup stage: Stage-2 + Stage-3 depends on stages: Stage-4 has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -226,7 +326,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -309,26 +409,101 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Reducer 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + sort order: + value expressions: key (type: string), value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: d1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: d2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[17][bigTable=a] in task 'Stage-1:MAPRED' is a cross product -Warning: Map Join MAPJOIN[18][bigTable=d1] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=a] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[32][bigTable=d1] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1 PREHOOK: type: QUERY POSTHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage has a backup stage: Stage-2 + Stage-3 depends on stages: Stage-4 has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -344,7 +519,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -424,13 +599,80 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 7 <- Reducer 6 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + sort order: + value expressions: key (type: string), value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: d1 + Reduce Output Operator + sort order: + value expressions: key (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: d2 + Reduce Output Operator + sort order: + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink -Warning: Map Join MAPJOIN[26][bigTable=?] in task 'Stage-1:MAPRED' is a cross product +Warning: Map Join MAPJOIN[46][bigTable=?] in task 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from (select A.key from A group by key) ss join (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 @@ -440,13 +682,14 @@ POSTHOOK: query: explain select * from (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-2 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-2 + Stage-4 is a root stage has a backup stage: Stage-2 + Stage-3 depends on stages: Stage-4 has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -465,7 +708,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 4 <- Map 3 (GROUP, 2) @@ -571,6 +814,98 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 10 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (GROUP, 2) + Reducer 11 <- Reducer 10 (GROUP, 2) + Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: d2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: a + Select Operator + expressions: key (type: string) + outputColumnNames: key + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: d1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 11 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string) + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out index 7238009..2448167 100644 --- ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out +++ ql/src/test/results/clientpositive/spark/identity_project_remove_skip.q.out @@ -17,12 +17,13 @@ from where t2.value='val_105' and t3.key='105' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark Edges: Reducer 2 <- Map 1 (SORT, 2) @@ -100,6 +101,63 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (SORT, 2) + Reducer 9 <- Map 8 (SORT, 2) + Reducer 7 <- Reducer 6 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: ((key is not null and (value = 'val_105')) and (key = '105')) (type: boolean) + Select Operator + Reduce Output Operator + key expressions: '105' (type: string) + sort order: + + Map 8 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (key = '105') (type: boolean) + Select Operator + Reduce Output Operator + key expressions: '105' (type: string) + sort order: + + Reducer 6 + Reduce Operator Tree: + Select Operator + Reduce Output Operator + key expressions: '105' (type: string) + sort order: + + Map-reduce partition columns: '105' (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Select Operator + expressions: '105' (type: string), 'val_105' (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Select Operator + Reduce Output Operator + key expressions: '105' (type: string) + sort order: + + Map-reduce partition columns: '105' (type: string) + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out index 3d4eb18..f608a2e 100644 --- ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out +++ ql/src/test/results/clientpositive/spark/infer_bucket_sort_convert_join.q.out @@ -78,7 +78,8 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 Status: Failed -Status: Failed +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.spark.SparkTask POSTHOOK: query: -- This test tests the scenario when the mapper dies. So, create a conditional task for the mapjoin. -- Tests a join which is not converted to a map join, the output should be bucketed and sorted. @@ -114,10 +115,10 @@ Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true - numFiles 0 + numFiles 2 numRows 0 rawDataSize 0 - totalSize 0 + totalSize 11996 #### A masked pattern was here #### # Storage Information diff --git ql/src/test/results/clientpositive/spark/join28.q.out ql/src/test/results/clientpositive/spark/join28.q.out index f23f662..cc42c7d 100644 --- ql/src/test/results/clientpositive/spark/join28.q.out +++ ql/src/test/results/clientpositive/spark/join28.q.out @@ -31,13 +31,14 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -146,6 +147,83 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: z + Filter Operator + predicate: ((11.0 = 11.0) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col4 + Select Operator + expressions: _col4 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value FROM @@ -157,6 +235,9 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value @@ -169,6 +250,9 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/spark/join29.q.out ql/src/test/results/clientpositive/spark/join29.q.out index 0b4284c..b4cdb3d 100644 --- ql/src/test/results/clientpositive/spark/join29.q.out +++ ql/src/test/results/clientpositive/spark/join29.q.out @@ -27,13 +27,14 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN (select y.key, count(1) as cnt from src y group by y.key) subq2 ON (subq1.key = subq2.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -146,6 +147,87 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 6 <- Map 5 (GROUP, 2) + Reducer 9 <- Map 8 (GROUP, 2) + Reducer 7 <- Reducer 6 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: key is not null (type: boolean) + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Map 8 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git ql/src/test/results/clientpositive/spark/join31.q.out ql/src/test/results/clientpositive/spark/join31.q.out index a52a8b6..0f9bffe 100644 --- ql/src/test/results/clientpositive/spark/join31.q.out +++ ql/src/test/results/clientpositive/spark/join31.q.out @@ -29,13 +29,14 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN group by subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) @@ -177,6 +178,109 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 11 <- Map 10 (GROUP, 2) + Reducer 7 <- Map 6 (GROUP, 2) + Reducer 8 <- Reducer 11 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Map 6 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: key is not null (type: boolean) + Group By Operator + aggregations: count(1) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git ql/src/test/results/clientpositive/spark/join32.q.out ql/src/test/results/clientpositive/spark/join32.q.out index a9d50b4..a831bac 100644 --- ql/src/test/results/clientpositive/spark/join32.q.out +++ ql/src/test/results/clientpositive/spark/join32.q.out @@ -100,13 +100,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -401,6 +402,400 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: y + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 6 + Map Operator Tree: + TableScan + alias: z + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + /srcpart/ds=2008-04-08/hr=12 [z] + /srcpart/ds=2008-04-09/hr=11 [z] + /srcpart/ds=2008-04-09/hr=12 [z] + Map 8 + Map Operator Tree: + TableScan + alias: x + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value is not null and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2, _col5 + Select Operator + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 7 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3 + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -410,6 +805,9 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value @@ -420,6 +818,9 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/spark/join32_lessSize.q.out ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index dac9610..5045354 100644 --- ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -108,13 +108,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -409,248 +410,36 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@dest_j1 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 -SELECT x.key, z.value, y.value -FROM src1 x JOIN src y ON (x.key = y.key) -JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from dest_j1 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j1 -#### A masked pattern was here #### -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -150 val_150 val_150 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -238 val_238 val_238 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -255 val_255 val_255 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -273 val_273 val_273 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -278 val_278 val_278 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -311 val_311 val_311 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -401 val_401 val_401 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -406 val_406 val_406 -66 val_66 val_66 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT x.key, z.value, y.value -FROM src w JOIN src1 x ON (x.value = w.value) -JOIN src y ON (x.key = y.key) -JOIN src1 z ON (x.key = z.key) -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j1 -SELECT x.key, z.value, y.value -FROM src w JOIN src1 x ON (x.value = w.value) -JOIN src y ON (x.key = y.key) -JOIN src1 z ON (x.key = z.key) -POSTHOOK: type: QUERY -ABSTRACT SYNTAX TREE: - -TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_JOIN - TOK_JOIN - TOK_TABREF - TOK_TABNAME - src - w - TOK_TABREF - TOK_TABNAME - src1 - x - = - . - TOK_TABLE_OR_COL - x - value - . - TOK_TABLE_OR_COL - w - value - TOK_TABREF - TOK_TABNAME - src - y - = - . - TOK_TABLE_OR_COL - x - key - . - TOK_TABLE_OR_COL - y - key - TOK_TABREF - TOK_TABNAME - src1 - z - = - . - TOK_TABLE_OR_COL - x - key - . - TOK_TABLE_OR_COL - z - key - TOK_INSERT - TOK_DESTINATION - TOK_TAB - TOK_TABNAME - dest_j1 - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - x - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - z - value - TOK_SELEXPR - . - TOK_TABLE_OR_COL - y - value - - -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-4 + Stage: Stage-3 Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: y GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -660,14 +449,14 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe @@ -680,54 +469,47 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.src numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.src + name: default.src Truncated Path -> Alias: - /src1 [x] - - Stage: Stage-3 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 + /src [y] + Map 6 Map Operator Tree: TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: z GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -735,11 +517,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -749,66 +533,28 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [w] - Map 4 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 3 - Position of Big Table: 1 - Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -816,126 +562,89 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 - Truncated Path -> Alias: - /src1 [x] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: w - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col4 - input vertices: - 1 Map 4 - Position of Big Table: 0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col1, _col3, _col6 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### - name default.dest_j1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - Path -> Partition: + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -943,11 +652,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -957,79 +668,171 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [w] - - Stage: Stage-0 - Move Operator - tables: - replace: true + /srcpart/ds=2008-04-08/hr=11 [z] + /srcpart/ds=2008-04-08/hr=12 [z] + /srcpart/ds=2008-04-09/hr=11 [z] + /srcpart/ds=2008-04-09/hr=12 [z] + Map 8 + Map Operator Tree: + TableScan + alias: x + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value is not null and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string + Path -> Partition: #### A masked pattern was here #### - name default.dest_j1 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j1 - - Stage: Stage-2 - Stats-Aggr Operator + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2, _col5 + Select Operator + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 7 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3 + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value -FROM src w JOIN src1 x ON (x.value = w.value) -JOIN src y ON (x.key = y.key) -JOIN src1 z ON (x.key = z.key) +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value -FROM src w JOIN src1 x ON (x.value = w.value) -JOIN src y ON (x.key = y.key) -JOIN src1 z ON (x.key = z.key) +FROM src1 x JOIN src y ON (x.key = y.key) +JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest_j1 -POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)w.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 @@ -1124,96 +927,79 @@ POSTHOOK: Input: default@dest_j1 98 val_98 val_98 98 val_98 val_98 PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j2 -SELECT res.key, z.value, res.value -FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res -JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src w JOIN src1 x ON (x.value = w.value) +JOIN src y ON (x.key = y.key) +JOIN src1 z ON (x.key = z.key) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE dest_j2 -SELECT res.key, z.value, res.value -FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res -JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src w JOIN src1 x ON (x.value = w.value) +JOIN src y ON (x.key = y.key) +JOIN src1 z ON (x.key = z.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN - TOK_SUBQUERY - TOK_QUERY - TOK_FROM - TOK_JOIN - TOK_TABREF - TOK_TABNAME - src1 - x - TOK_TABREF - TOK_TABNAME - src - y - = - . - TOK_TABLE_OR_COL - x - key - . - TOK_TABLE_OR_COL - y - key - TOK_INSERT - TOK_DESTINATION - TOK_DIR - TOK_TMP_FILE - TOK_SELECT - TOK_SELEXPR - . - TOK_TABLE_OR_COL - x - key - TOK_SELEXPR - . - TOK_TABLE_OR_COL - x - value - res - TOK_TABREF - TOK_TABNAME - srcpart - z - and - and + TOK_JOIN + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src + w + TOK_TABREF + TOK_TABNAME + src1 + x = . TOK_TABLE_OR_COL - res + x value . TOK_TABLE_OR_COL - z + w value - = - . - TOK_TABLE_OR_COL - z - ds - '2008-04-08' + TOK_TABREF + TOK_TABNAME + src + y = . TOK_TABLE_OR_COL - z - hr - 11 + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_TABREF + TOK_TABNAME + src1 + z + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + z + key TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME - dest_j2 + dest_j1 TOK_SELECT TOK_SELEXPR . TOK_TABLE_OR_COL - res + x key TOK_SELEXPR . @@ -1223,39 +1009,41 @@ TOK_QUERY TOK_SELEXPR . TOK_TABLE_OR_COL - res + y value STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-5 is a root stage has a backup stage: Stage-3 + Stage-4 depends on stages: Stage-5 has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-5 Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 1 Local Work: Map Reduce Local Work @@ -1264,12 +1052,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1277,40 +1062,116 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src1 numFiles 1 - numRows 500 - partition_columns ds/hr - partition_columns.types string:string - rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src1 + name: default.src1 Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + /src1 [x] + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: w + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [w] + Map 4 Map Operator Tree: TableScan alias: x @@ -1318,17 +1179,28 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - Position of Big Table: 0 + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Map 3 + Position of Big Table: 1 + Statistics: Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 0 Local Work: Map Reduce Local Work Path -> Alias: @@ -1388,15 +1260,15 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: y + alias: w Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -1404,57 +1276,58 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 + 1 _col1 (type: string) + outputColumnNames: _col1, _col4 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3, _col4 - input vertices: - 0 Map 1 - Position of Big Table: 1 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col3, _col6 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string #### A masked pattern was here #### - name default.dest_j2 - serialization.ddl struct dest_j2 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name default.dest_j1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1505,7 +1378,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - /src [y] + /src [w] Stage: Stage-0 Move Operator @@ -1516,62 +1389,1334 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value,val2 columns.comments columns.types string:string:string #### A masked pattern was here #### - name default.dest_j2 - serialization.ddl struct dest_j2 { string key, string value, string val2} + name default.dest_j1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 + name: default.dest_j1 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 -SELECT res.key, z.value, res.value -FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res -JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -PREHOOK: Input: default@srcpart -PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: default@dest_j2 -POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 -SELECT res.key, z.value, res.value -FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res -JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -POSTHOOK: Input: default@srcpart -POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: default@dest_j2 -POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from dest_j2 -PREHOOK: type: QUERY -PREHOOK: Input: default@dest_j2 -#### A masked pattern was here #### -POSTHOOK: query: select * from dest_j2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@dest_j2 + Stage: Stage-3 + Spark + Edges: + Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -146 val_146 val_146 -150 val_150 val_150 -213 val_213 val_213 -213 val_213 val_213 -213 val_213 val_213 + Vertices: + Map 11 + Map Operator Tree: + TableScan + alias: x + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Map 5 + Map Operator Tree: + TableScan + alias: w + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [w] + Map 7 + Map Operator Tree: + TableScan + alias: w + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [w] + Map 9 + Map Operator Tree: + TableScan + alias: x + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 10 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 1 + value expressions: _col0 (type: string), _col3 (type: string) + auto parallelism: false + Reducer 6 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col3, _col6 + Select Operator + expressions: _col3 (type: string), _col6 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 8 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 1 + value expressions: _col4 (type: string) + auto parallelism: false + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src w JOIN src1 x ON (x.value = w.value) +JOIN src y ON (x.key = y.key) +JOIN src1 z ON (x.key = z.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest_j1 +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 +SELECT x.key, z.value, y.value +FROM src w JOIN src1 x ON (x.value = w.value) +JOIN src y ON (x.key = y.key) +JOIN src1 z ON (x.key = z.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest_j1 +POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)w.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j1 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +238 val_238 val_238 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +255 val_255 val_255 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +273 val_273 val_273 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +278 val_278 val_278 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +311 val_311 val_311 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +401 val_401 val_401 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +406 val_406 val_406 +66 val_66 val_66 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +98 val_98 val_98 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + src1 + x + TOK_TABREF + TOK_TABNAME + src + y + = + . + TOK_TABLE_OR_COL + x + key + . + TOK_TABLE_OR_COL + y + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + x + value + res + TOK_TABREF + TOK_TABNAME + srcpart + z + and + and + = + . + TOK_TABLE_OR_COL + res + value + . + TOK_TABLE_OR_COL + z + value + = + . + TOK_TABLE_OR_COL + z + ds + '2008-04-08' + = + . + TOK_TABLE_OR_COL + z + hr + 11 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest_j2 + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + res + key + TOK_SELEXPR + . + TOK_TABLE_OR_COL + z + value + TOK_SELEXPR + . + TOK_TABLE_OR_COL + res + value + + +STAGE DEPENDENCIES: + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + Map 3 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col4 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-3 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: z + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + /srcpart/ds=2008-04-08/hr=12 [z] + /srcpart/ds=2008-04-09/hr=11 [z] + /srcpart/ds=2008-04-09/hr=12 [z] + Map 6 + Map Operator Tree: + TableScan + alias: y + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 8 + Map Operator Tree: + TableScan + alias: x + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3, _col4 + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 7 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2 + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false + +PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@dest_j2 +POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 +SELECT res.key, z.value, res.value +FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res +JOIN srcpart z ON (res.value = z.value and z.ds='2008-04-08' and z.hr=11) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@dest_j2 +POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j2.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from dest_j2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest_j2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_j2 +#### A masked pattern was here #### +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +146 val_146 val_146 +150 val_150 val_150 +213 val_213 val_213 +213 val_213 val_213 +213 val_213 val_213 213 val_213 val_213 238 val_238 val_238 238 val_238 val_238 @@ -1753,40 +2898,345 @@ TOK_QUERY value -STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 +STAGE DEPENDENCIES: + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 3 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + Position of Big Table: 0 + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + input vertices: + 0 Map 1 + Position of Big Table: 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 3 + Position of Big Table: 0 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### -STAGE PLANS: Stage: Stage-3 Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 1 - Local Work: - Map Reduce Local Work + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1810,57 +3260,208 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 7 + Map Operator Tree: + TableScan + alias: x + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Map 8 + Map Operator Tree: + TableScan + alias: z + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [y] - Map 3 - Map Operator Tree: - TableScan - alias: z - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: + name: default.srcpart + name: default.srcpart #### A masked pattern was here #### Partition - base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: - ds 2008-04-08 + ds 2008-04-09 hr 11 properties: COLUMN_STATS_ACCURATE true @@ -1900,98 +3501,13 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcpart name: default.srcpart - Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] - - Stage: Stage-1 - Spark -#### A masked pattern was here #### - Vertices: - Map 2 - Map Operator Tree: - TableScan - alias: x - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col2 - input vertices: - 0 Map 1 - Position of Big Table: 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 3 - Position of Big Table: 0 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j2 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Local Work: - Map Reduce Local Work - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: #### A masked pattern was here #### Partition - base file name: src1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1999,71 +3515,99 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 + name default.srcpart numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src1 - numFiles 1 - numRows 25 - rawDataSize 191 - serialization.ddl struct src1 { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 216 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src1 - name: default.src1 + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src1 [x] - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE true - bucket_count -1 - columns key,value,val2 - columns.comments - columns.types string:string:string -#### A masked pattern was here #### - name default.dest_j2 - numFiles 1 - numRows 85 - rawDataSize 1600 - serialization.ddl struct dest_j2 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 1685 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_j2 - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### + /srcpart/ds=2008-04-08/hr=11 [z] + /srcpart/ds=2008-04-08/hr=12 [z] + /srcpart/ds=2008-04-09/hr=11 [z] + /srcpart/ds=2008-04-09/hr=12 [z] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + outputColumnNames: _col1, _col2 + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 0 + value expressions: _col0 (type: string) + auto parallelism: false + Reducer 6 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j2 + numFiles 1 + numRows 85 + rawDataSize 1600 + serialization.ddl struct dest_j2 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 1685 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value @@ -2074,6 +3618,9 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest_j2 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value @@ -2084,6 +3631,9 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest_j2 POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] @@ -2206,13 +3756,14 @@ FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res JOIN srcpart x ON (res.value = x.value and x.ds='2008-04-08' and x.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -2321,6 +3872,84 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3, _col4 + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2 + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, x.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -2330,6 +3959,9 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest_j2 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, x.value, res.value @@ -2340,6 +3972,9 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest_j2 POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] @@ -2450,13 +4085,14 @@ FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res JOIN srcpart y ON (res.value = y.value and y.ds='2008-04-08' and y.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -2565,6 +4201,84 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3, _col4 + Select Operator + expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j2 + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2 + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, y.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -2574,6 +4288,9 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest_j2 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, y.value, res.value @@ -2584,6 +4301,9 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest_j2 POSTHOOK: Lineage: dest_j2.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.val2 SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/spark/join33.q.out ql/src/test/results/clientpositive/spark/join33.q.out index a9d50b4..a831bac 100644 --- ql/src/test/results/clientpositive/spark/join33.q.out +++ ql/src/test/results/clientpositive/spark/join33.q.out @@ -100,13 +100,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 + Stage-4 is a root stage has a backup stage: Stage-3 + Stage-1 depends on stages: Stage-4 has a backup stage: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: @@ -401,6 +402,400 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: y + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [y] + Map 6 + Map Operator Tree: + TableScan + alias: z + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [z] + /srcpart/ds=2008-04-08/hr=12 [z] + /srcpart/ds=2008-04-09/hr=11 [z] + /srcpart/ds=2008-04-09/hr=12 [z] + Map 8 + Map Operator Tree: + TableScan + alias: x + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (value is not null and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [x] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2, _col5 + Select Operator + expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value,val2 + columns.comments + columns.types string:string:string +#### A masked pattern was here #### + name default.dest_j1 + serialization.ddl struct dest_j1 { string key, string value, string val2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_j1 + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 7 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col3 + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -410,6 +805,9 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest_j1 POSTHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value @@ -420,6 +818,9 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/spark/join_reorder4.q.out ql/src/test/results/clientpositive/spark/join_reorder4.q.out index 5cc30f7..79f91d5 100644 --- ql/src/test/results/clientpositive/spark/join_reorder4.q.out +++ ql/src/test/results/clientpositive/spark/join_reorder4.q.out @@ -51,12 +51,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(a) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -130,6 +131,62 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key1 is not null (type: boolean) + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + value expressions: val1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key2 is not null (type: boolean) + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + value expressions: val2 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: c + Filter Operator + predicate: key3 is not null (type: boolean) + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + value expressions: val3 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -154,12 +211,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(b) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -233,6 +291,62 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key1 is not null (type: boolean) + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + value expressions: val1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key2 is not null (type: boolean) + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + value expressions: val2 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: c + Filter Operator + predicate: key3 is not null (type: boolean) + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + value expressions: val3 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -257,12 +371,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select /*+ STREAMTABLE(c) */ a.*, b.*, c.* from T1 a join T2 b on a.key1=b.key2 join T3 c on a.key1=c.key3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -336,6 +451,62 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key1 is not null (type: boolean) + Reduce Output Operator + key expressions: key1 (type: string) + sort order: + + Map-reduce partition columns: key1 (type: string) + value expressions: val1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key2 is not null (type: boolean) + Reduce Output Operator + key expressions: key2 (type: string) + sort order: + + Map-reduce partition columns: key2 (type: string) + value expressions: val2 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: c + Filter Operator + predicate: key3 is not null (type: boolean) + Reduce Output Operator + key expressions: key3 (type: string) + sort order: + + Map-reduce partition columns: key3 (type: string) + value expressions: val3 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/join_star.q.out ql/src/test/results/clientpositive/spark/join_star.q.out index 69c2fd7..1a4ecd4 100644 --- ql/src/test/results/clientpositive/spark/join_star.q.out +++ ql/src/test/results/clientpositive/spark/join_star.q.out @@ -131,12 +131,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2 from fact join dim1 on fact.d1=dim1.f1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -191,6 +192,50 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: fact + Filter Operator + predicate: d1 is not null (type: boolean) + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + value expressions: m1 (type: int), m2 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: dim1 + Filter Operator + predicate: f1 is not null (type: boolean) + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + value expressions: f2 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col8 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -222,12 +267,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1=dim1.f1 join dim2 on fact.d2=dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -306,6 +352,73 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: fact + Filter Operator + predicate: (d1 is not null and d2 is not null) (type: boolean) + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + value expressions: m1 (type: int), m2 (type: int), d2 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: dim1 + Filter Operator + predicate: f1 is not null (type: boolean) + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + value expressions: f2 (type: int) + Map 8 + Map Operator Tree: + TableScan + alias: dim2 + Filter Operator + predicate: f3 is not null (type: boolean) + Reduce Output Operator + key expressions: f3 (type: int) + sort order: + + Map-reduce partition columns: f3 (type: int) + value expressions: f4 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3, _col8 + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col8, _col13 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -339,12 +452,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact join dim1 on fact.d1= dim1.f1 join dim2 on dim1.f2 = dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -423,6 +537,73 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: fact + Filter Operator + predicate: d1 is not null (type: boolean) + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + value expressions: m1 (type: int), m2 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: dim1 + Filter Operator + predicate: (f1 is not null and f2 is not null) (type: boolean) + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + value expressions: f2 (type: int) + Map 8 + Map Operator Tree: + TableScan + alias: dim2 + Filter Operator + predicate: f3 is not null (type: boolean) + Reduce Output Operator + key expressions: f3 (type: int) + sort order: + + Map-reduce partition columns: f3 (type: int) + value expressions: f4 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col8 + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col8, _col13 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -456,12 +637,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select m1, m2, f2, f4 from fact Left outer join dim1 on fact.d1= dim1.f1 Left outer join dim2 on dim1.f2 = dim2.f3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -531,6 +713,67 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: fact + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + value expressions: m1 (type: int), m2 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: dim1 + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + value expressions: f2 (type: int) + Map 8 + Map Operator Tree: + TableScan + alias: dim2 + Reduce Output Operator + key expressions: f3 (type: int) + sort order: + + Map-reduce partition columns: f3 (type: int) + value expressions: f4 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col8 + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + value expressions: _col0 (type: int), _col1 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col8, _col13 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -580,12 +823,13 @@ POSTHOOK: query: explain Select m1, m2, f2, f4, f6, f8, f10, f12, f14 Left outer Join dim7 on dim6.f12 = dim7.f13 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -755,6 +999,161 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 10 <- Map 16 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 11 <- Map 17 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Map 18 (PARTITION-LEVEL SORT, 2), Reducer 11 (PARTITION-LEVEL SORT, 2) + Reducer 13 <- Map 19 (PARTITION-LEVEL SORT, 2), Map 21 (PARTITION-LEVEL SORT, 2), Reducer 12 (PARTITION-LEVEL SORT, 2) + Reducer 14 <- Map 20 (PARTITION-LEVEL SORT, 2), Reducer 13 (PARTITION-LEVEL SORT, 2) + Reducer 15 <- Map 22 (PARTITION-LEVEL SORT, 2), Reducer 14 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 16 + Map Operator Tree: + TableScan + alias: dim1 + Reduce Output Operator + key expressions: f1 (type: int) + sort order: + + Map-reduce partition columns: f1 (type: int) + value expressions: f2 (type: int) + Map 17 + Map Operator Tree: + TableScan + alias: dim2 + Reduce Output Operator + key expressions: f3 (type: int) + sort order: + + Map-reduce partition columns: f3 (type: int) + value expressions: f4 (type: int) + Map 18 + Map Operator Tree: + TableScan + alias: dim3 + Reduce Output Operator + key expressions: f5 (type: int) + sort order: + + Map-reduce partition columns: f5 (type: int) + value expressions: f6 (type: int) + Map 19 + Map Operator Tree: + TableScan + alias: dim4 + Reduce Output Operator + key expressions: f7 (type: int) + sort order: + + Map-reduce partition columns: f7 (type: int) + value expressions: f8 (type: int) + Map 20 + Map Operator Tree: + TableScan + alias: dim5 + Reduce Output Operator + key expressions: f9 (type: int) + sort order: + + Map-reduce partition columns: f9 (type: int) + value expressions: f10 (type: int) + Map 21 + Map Operator Tree: + TableScan + alias: dim6 + Reduce Output Operator + key expressions: f11 (type: int) + sort order: + + Map-reduce partition columns: f11 (type: int) + value expressions: f12 (type: int) + Map 22 + Map Operator Tree: + TableScan + alias: dim7 + Reduce Output Operator + key expressions: f13 (type: int) + sort order: + + Map-reduce partition columns: f13 (type: int) + value expressions: f14 (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: fact + Reduce Output Operator + key expressions: d1 (type: int) + sort order: + + Map-reduce partition columns: d1 (type: int) + value expressions: m1 (type: int), m2 (type: int), d2 (type: int) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col3, _col8 + Reduce Output Operator + key expressions: _col8 (type: int) + sort order: + + Map-reduce partition columns: _col8 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col3, _col8, _col13 + Reduce Output Operator + key expressions: _col3 (type: int) + sort order: + + Map-reduce partition columns: _col3 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + Reducer 12 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col8, _col13, _col18 + Reduce Output Operator + key expressions: _col18 (type: int) + sort order: + + Map-reduce partition columns: _col18 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int) + Reducer 13 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28 + Reduce Output Operator + key expressions: _col23 (type: int) + sort order: + + Map-reduce partition columns: _col23 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col28 (type: int) + Reducer 14 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33 + Reduce Output Operator + key expressions: _col28 (type: int) + sort order: + + Map-reduce partition columns: _col28 (type: int) + value expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int) + Reducer 15 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col8, _col13, _col18, _col23, _col28, _col33, _col38 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col8 (type: int), _col13 (type: int), _col18 (type: int), _col23 (type: int), _col33 (type: int), _col28 (type: int), _col38 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out index b681e5f..1653203 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_decimal.q.out @@ -83,12 +83,13 @@ POSTHOOK: query: explain select t1.dec, t2.dec from t1 join t2 on (t1.dec=t2.dec) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -143,6 +144,48 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: t1 + Filter Operator + predicate: dec is not null (type: boolean) + Reduce Output Operator + key expressions: dec (type: decimal(4,2)) + sort order: + + Map-reduce partition columns: dec (type: decimal(4,2)) + Map 5 + Map Operator Tree: + TableScan + alias: t2 + Filter Operator + predicate: dec is not null (type: boolean) + Reduce Output Operator + key expressions: dec (type: decimal(4,0)) + sort order: + + Map-reduce partition columns: dec (type: decimal(4,0)) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col4 + Select Operator + expressions: _col0 (type: decimal(4,2)), _col4 (type: decimal(4,0)) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out index 0271f97..bb2f7cd 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_filter_on_outerjoin.q.out @@ -218,12 +218,13 @@ SELECT * FROM src1 SORT BY src1.key, src2.key, src3.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -322,6 +323,76 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: ((key < 300) and (key < 10)) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: (key < 300) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Filter Operator + predicate: (key < 300) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 > 10)} + 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) + sort order: +++ + value expressions: _col1 (type: string), _col3 (type: string), _col5 (type: string) + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out index 7aa8ce9..a972047 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_hook.q.out @@ -23,7 +23,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-3:MAPRED +RUN: Stage-4:MAPRED RUN: Stage-1:MAPRED RUN: Stage-0:MOVE RUN: Stage-2:STATS @@ -39,10 +39,11 @@ PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 Status: Failed -Status: Failed +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.spark.SparkTask [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 +RUN: Stage-4:MAPRED RUN: Stage-3:MAPRED -RUN: Stage-1:MAPRED RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) @@ -51,11 +52,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 Status: Failed -Status: Failed -Status: Failed +FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.spark.SparkTask +ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.spark.SparkTask [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-4:MAPRED +RUN: Stage-5:MAPRED RUN: Stage-3:MAPRED -RUN: Stage-1:MAPRED RUN: Stage-0:MOVE RUN: Stage-2:STATS diff --git ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index 65a7d06..fde392d 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -56,12 +56,13 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -466,6 +467,394 @@ STAGE PLANS: /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [src] + Map 6 + Map Operator Tree: + TableScan + alias: srcpart + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 0 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 8 + Map Operator Tree: + TableScan + alias: src1 + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + tag: 1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src1 + numFiles 1 + numRows 25 + rawDataSize 191 + serialization.ddl struct src1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 216 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src1 + name: default.src1 + Truncated Path -> Alias: + /src1 [src1] + Reducer 5 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 7 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + tag: 1 + value expressions: _col0 (type: string) + auto parallelism: false + Stage: Stage-0 Fetch Operator limit: -1 @@ -479,12 +868,13 @@ POSTHOOK: query: explain select srcpart.key from srcpart join src on (srcpart.value=src.value) join src1 on (srcpart.key=src1.key) where srcpart.value > 'val_450' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -575,6 +965,80 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: (value > 'val_450') (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: srcpart + Filter Operator + predicate: (((value > 'val_450') and key is not null) and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 @@ -588,12 +1052,13 @@ POSTHOOK: query: explain select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -710,6 +1175,101 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Filter Operator + predicate: (value is not null and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string), _col2 (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: value is not null (type: boolean) + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col2 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col2 (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col2 + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col0 + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out index 14f316c..1148bdc 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_memcheck.q.out @@ -29,12 +29,13 @@ select src1.key as k1, src1.value as v1, src2.key, src2.value from src0 src1 inner join src0 src2 on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -89,6 +90,50 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: src1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 5 + Map Operator Tree: + TableScan + alias: src2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out index 2d1e7a7..86a8a91 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_subquery.q.out @@ -19,12 +19,13 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -119,6 +120,82 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: z + Filter Operator + predicate: ((11.0 = 11.0) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col4 + Select Operator + expressions: _col4 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 @@ -135,6 +212,9 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: SELECT subq.key1, z.value FROM @@ -146,6 +226,9 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 128 val_128 128 val_128 @@ -269,12 +352,13 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -369,6 +453,82 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: z + Filter Operator + predicate: ((11.0 = 11.0) and key is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col4 + Select Operator + expressions: _col4 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1 + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Stage: Stage-0 Fetch Operator limit: -1 @@ -385,6 +545,9 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: SELECT subq.key1, z.value FROM @@ -396,6 +559,9 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 128 val_128 128 val_128 diff --git ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out index a757d0b..9027c32 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_subquery2.q.out @@ -83,12 +83,13 @@ FROM JOIN z ON (subq.key1 = z.id) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -171,6 +172,76 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: y + Filter Operator + predicate: id is not null (type: boolean) + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + value expressions: name (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: x + Filter Operator + predicate: id is not null (type: boolean) + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + value expressions: name (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: z + Filter Operator + predicate: id is not null (type: boolean) + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + value expressions: name (type: string) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Select Operator + expressions: _col6 (type: int), _col5 (type: string), _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out index 7143348..b0abb74 100644 --- ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out +++ ql/src/test/results/clientpositive/spark/mapjoin_test_outer.q.out @@ -1078,12 +1078,13 @@ SELECT * FROM src1 SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -1160,6 +1161,65 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: src1 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src2 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: src3 + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + Inner Join 1 to 2 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + sort order: ++++++ + Reducer 7 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/multi_join_union.q.out ql/src/test/results/clientpositive/spark/multi_join_union.q.out index bda569d..30d18c1 100644 --- ql/src/test/results/clientpositive/spark/multi_join_union.q.out +++ ql/src/test/results/clientpositive/spark/multi_join_union.q.out @@ -53,12 +53,13 @@ src12 b ON (a.key = b.key) JOIN (SELECT * FROM (SELECT * FROM src13 UNION ALL SELECT * FROM src14)a )c ON c.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -159,6 +160,90 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 10 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 6 (PARTITION-LEVEL SORT, 2), Map 8 (PARTITION-LEVEL SORT, 2), Reducer 10 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 11 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Map 6 + Map Operator Tree: + TableScan + alias: src13 + Filter Operator + predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + Map 8 + Map Operator Tree: + TableScan + alias: src14 + Filter Operator + predicate: value is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + value expressions: _col0 (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 10 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6 + Reduce Output Operator + key expressions: _col6 (type: string) + sort order: + + Map-reduce partition columns: _col6 (type: string) + value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/parquet_join.q.out ql/src/test/results/clientpositive/spark/parquet_join.q.out index 390aeb1..94cf0ea 100644 --- ql/src/test/results/clientpositive/spark/parquet_join.q.out +++ ql/src/test/results/clientpositive/spark/parquet_join.q.out @@ -163,12 +163,13 @@ POSTHOOK: query: -- The two tables involved in the join have differing number of explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -223,6 +224,49 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: p2 + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: myvalue (type: string) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col7 + Select Operator + expressions: _col7 (type: string) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out index 19ab4c8..8f750b2 100644 --- ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out +++ ql/src/test/results/clientpositive/spark/reduce_deduplicate_exclude_join.q.out @@ -3,12 +3,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select * from (select * from src cluster by key) a join src b on a.key = b.key limit 1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -84,6 +85,66 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (SORT, 2) + Reducer 6 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: src + Filter Operator + predicate: key is not null (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Map 7 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: key is not null (type: boolean) + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + value expressions: value (type: string) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Limit + Number of rows: 1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: 1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out index bd3a6a1..c1072f1 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_17.q.out @@ -580,12 +580,13 @@ LEFT OUTER JOIN test_table7 s ON a.key = s.key LEFT OUTER JOIN test_table8 t ON a.key = t.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -724,6 +725,218 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 1), Map 11 (PARTITION-LEVEL SORT, 1), Map 12 (PARTITION-LEVEL SORT, 1), Map 13 (PARTITION-LEVEL SORT, 1), Map 14 (PARTITION-LEVEL SORT, 1), Map 15 (PARTITION-LEVEL SORT, 1), Map 16 (PARTITION-LEVEL SORT, 1), Map 17 (PARTITION-LEVEL SORT, 1), Map 18 (PARTITION-LEVEL SORT, 1), Map 19 (PARTITION-LEVEL SORT, 1), Map 20 (PARTITION-LEVEL SORT, 1), Map 21 (PARTITION-LEVEL SORT, 1), Map 22 (PARTITION-LEVEL SORT, 1), Map 23 (PARTITION-LEVEL SORT, 1), Map 6 (PARTITION-LEVEL SORT, 1), Map 9 (PARTITION-LEVEL SORT, 1) + Reducer 8 <- Map 24 (PARTITION-LEVEL SORT, 1), Map 25 (PARTITION-LEVEL SORT, 1), Map 26 (PARTITION-LEVEL SORT, 1), Map 27 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: c + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 11 + Map Operator Tree: + TableScan + alias: d + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 12 + Map Operator Tree: + TableScan + alias: e + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 13 + Map Operator Tree: + TableScan + alias: f + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 14 + Map Operator Tree: + TableScan + alias: g + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 15 + Map Operator Tree: + TableScan + alias: h + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 16 + Map Operator Tree: + TableScan + alias: i + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 17 + Map Operator Tree: + TableScan + alias: j + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 18 + Map Operator Tree: + TableScan + alias: k + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 19 + Map Operator Tree: + TableScan + alias: l + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 20 + Map Operator Tree: + TableScan + alias: m + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 21 + Map Operator Tree: + TableScan + alias: n + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 22 + Map Operator Tree: + TableScan + alias: o + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 23 + Map Operator Tree: + TableScan + alias: p + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 24 + Map Operator Tree: + TableScan + alias: q + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 25 + Map Operator Tree: + TableScan + alias: r + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 26 + Map Operator Tree: + TableScan + alias: s + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 27 + Map Operator Tree: + TableScan + alias: t + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: a + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + value expressions: value (type: string) + Map 9 + Map Operator Tree: + TableScan + alias: b + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + Left Outer Join0 to 5 + Left Outer Join0 to 6 + Left Outer Join0 to 7 + Left Outer Join0 to 8 + Left Outer Join0 to 9 + Left Outer Join0 to 10 + Left Outer Join0 to 11 + Left Outer Join0 to 12 + Left Outer Join0 to 13 + Left Outer Join0 to 14 + Left Outer Join0 to 15 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + value expressions: _col1 (type: string) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + Left Outer Join0 to 3 + Left Outer Join0 to 4 + outputColumnNames: _col0, _col1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out index cb811ed..ca479fb 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_25.q.out @@ -201,12 +201,13 @@ explain select * from (select a.key from smb_bucket_1 a join smb_bucket_2 b on (a.key = b.key) where a.key = 5) t1 left outer join (select c.key from smb_bucket_2 c join smb_bucket_3 d on (c.key = d.key) where c.key=5) t2 on (t1.key=t2.key) where t2.key=5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -286,6 +287,96 @@ STAGE PLANS: Local Work: Map Reduce Local Work + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: (key = 5) (type: boolean) + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Map-reduce partition columns: 5 (type: int) + Map 6 + Map Operator Tree: + TableScan + alias: b + Filter Operator + predicate: (key = 5) (type: boolean) + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Map-reduce partition columns: 5 (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: c + Filter Operator + predicate: (key = 5) (type: boolean) + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Map-reduce partition columns: 5 (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: d + Filter Operator + predicate: (key = 5) (type: boolean) + Reduce Output Operator + key expressions: 5 (type: int) + sort order: + + Map-reduce partition columns: 5 (type: int) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1 + Filter Operator + predicate: (_col1 = 5) (type: boolean) + Select Operator + expressions: _col0 (type: int), 5 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Select Operator + expressions: 5 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out index 92a8595..f87b7b7 100644 --- ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out +++ ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.java1.7.out @@ -465,7 +465,7 @@ POSTHOOK: Input: default@src_5 199 val_199 199 val_199 2 val_2 -Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product +Warning: Map Join MAPJOIN[78][bigTable=b] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain from src b INSERT OVERWRITE TABLE src_4 @@ -495,15 +495,16 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 is a root stage has a backup stage: Stage-5 + Stage-2 depends on stages: Stage-6 has a backup stage: Stage-5 Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-4 depends on stages: Stage-0 + Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Spark Edges: Reducer 6 <- Map 5 (GROUP, 1) @@ -699,7 +700,141 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator -Warning: Map Join MAPJOIN[46][bigTable=b] in task 'Stage-2:MAPRED' is a cross product + Stage: Stage-5 + Spark + Edges: + Reducer 11 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 15 <- Map 14 (GROUP, 1) + Reducer 9 <- Map 13 (PARTITION-LEVEL SORT, 2), Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Reducer 9 (SORT, 1) + Reducer 8 <- Map 7 (PARTITION-LEVEL SORT, 1), Reducer 15 (PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 12 + Map Operator Tree: + TableScan + alias: a + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Map 13 + Map Operator Tree: + TableScan + alias: s1 + Filter Operator + predicate: (key > '2') (type: boolean) + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Map 14 + Map Operator Tree: + TableScan + alias: s1 + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Select Operator + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Map 7 + Map Operator Tree: + TableScan + alias: b + Reduce Output Operator + sort order: + value expressions: key (type: string), value (type: string) + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), value (type: string) + Reducer 10 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + Reducer 11 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Reducer 15 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: string) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0, _col1, _col5 + Filter Operator + predicate: _col5 is null (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + value expressions: _col1 (type: string) + +Warning: Map Join MAPJOIN[78][bigTable=b] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 select * @@ -736,7 +871,7 @@ POSTHOOK: Lineage: src_4.key EXPRESSION [(src)b.FieldSchema(name:key, type:strin POSTHOOK: Lineage: src_4.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key EXPRESSION [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value EXPRESSION [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-5:MAPRED +RUN: Stage-6:MAPRED RUN: Stage-2:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE diff --git ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out index 5ec95c2..87243f9 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_mapjoin.q.out @@ -31,12 +31,13 @@ POSTHOOK: query: EXPLAIN SELECT l.cint, r.cint, l.cdecimal1, r.cdecimal2 WHERE l.cint = 6981 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -92,6 +93,50 @@ STAGE PLANS: Map Reduce Local Work Execution mode: vectorized + Stage: Stage-2 + Spark + Edges: + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: l + Filter Operator + predicate: (cint = 6981) (type: boolean) + Reduce Output Operator + key expressions: 6981 (type: int) + sort order: + + Map-reduce partition columns: 6981 (type: int) + value expressions: cdecimal1 (type: decimal(20,10)) + Map 5 + Map Operator Tree: + TableScan + alias: r + Filter Operator + predicate: (cint = 6981) (type: boolean) + Reduce Output Operator + key expressions: 6981 (type: int) + sort order: + + Map-reduce partition columns: 6981 (type: int) + value expressions: cdecimal2 (type: decimal(23,14)) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col9 + Select Operator + expressions: 6981 (type: int), 6981 (type: int), _col1 (type: decimal(20,10)), _col9 (type: decimal(23,14)) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out index ca8918a..fe33a52 100644 --- ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out @@ -17,12 +17,13 @@ left outer join alltypesorc hd ) t1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -123,6 +124,86 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: c + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Map 5 + Map Operator Tree: + TableScan + alias: c + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + value expressions: _col0 (type: tinyint) + Map 9 + Map Operator Tree: + TableScan + alias: c + Select Operator + expressions: cint (type: int) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 02c1fc6..a4d1f61 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -23,12 +23,13 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -136,6 +137,93 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (GROUP, 2) + Reducer 7 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: lineitem + Filter Operator + predicate: ((l_shipmode = 'AIR') and l_orderkey is not null) (type: boolean) + Select Operator + expressions: l_orderkey (type: int) + outputColumnNames: _col0 + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: lineitem + Filter Operator + predicate: l_partkey is not null (type: boolean) + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: li + Filter Operator + predicate: ((l_partkey is not null and l_orderkey is not null) and (l_linenumber = 1)) (type: boolean) + Reduce Output Operator + key expressions: l_partkey (type: int) + sort order: + + Map-reduce partition columns: l_partkey (type: int) + value expressions: l_orderkey (type: int), l_suppkey (type: int) + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3 + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + value expressions: _col0 (type: int), _col3 (type: int) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + outputColumnNames: _col0, _col3 + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -181,12 +269,13 @@ where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -294,6 +383,93 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (GROUP, 2) + Reducer 7 <- Map 9 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: lineitem + Filter Operator + predicate: ((((l_shipmode = 'AIR') and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) + Select Operator + expressions: l_orderkey (type: int), 1 (type: int) + outputColumnNames: _col0, _col1 + Group By Operator + keys: _col0 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Map 5 + Map Operator Tree: + TableScan + alias: lineitem + Filter Operator + predicate: l_partkey is not null (type: boolean) + Group By Operator + keys: l_partkey (type: int) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Map 9 + Map Operator Tree: + TableScan + alias: li + Filter Operator + predicate: (((l_partkey is not null and l_orderkey is not null) and l_linenumber is not null) and (l_linenumber = 1)) (type: boolean) + Reduce Output Operator + key expressions: l_partkey (type: int) + sort order: + + Map-reduce partition columns: l_partkey (type: int) + value expressions: l_orderkey (type: int), l_suppkey (type: int), 1 (type: int) + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1, _col3, _col4 + Reduce Output Operator + key expressions: _col1 (type: int), _col4 (type: int) + sort order: ++ + Map-reduce partition columns: _col1 (type: int), _col4 (type: int) + value expressions: _col0 (type: int), _col3 (type: int) + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + outputColumnNames: _col0, _col3 + Select Operator + expressions: _col0 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 237df98..964a330 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -7,12 +7,13 @@ POSTHOOK: query: EXPLAIN SELECT COUNT(t1.cint), MAX(t2.cint), MIN(t1.cint), AVG( JOIN alltypesorc t2 ON t1.cint = t2.cint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -86,6 +87,62 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Filter Operator + predicate: cint is not null (type: boolean) + Reduce Output Operator + key expressions: cint (type: int) + sort order: + + Map-reduce partition columns: cint (type: int) + Map 7 + Map Operator Tree: + TableScan + alias: t2 + Filter Operator + predicate: cint is not null (type: boolean) + Reduce Output Operator + key expressions: cint (type: int) + sort order: + + Map-reduce partition columns: cint (type: int) + Reducer 5 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col2, _col17 + Group By Operator + aggregations: count(_col2), max(_col17), min(_col2), avg((_col2 + _col17)) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index f8e8ba7..85e8381 100644 --- ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -3,12 +3,13 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 is a root stage has a backup stage: Stage-2 + Stage-1 depends on stages: Stage-3 has a backup stage: Stage-2 + Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-2 + Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: @@ -127,6 +128,100 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Stage: Stage-2 + Spark + Edges: + Reducer 6 <- Map 5 (PARTITION-LEVEL SORT, 2), Map 9 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 10 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 10 + Map Operator Tree: + TableScan + alias: v1 + Filter Operator + predicate: csmallint is not null (type: boolean) + Select Operator + expressions: csmallint (type: smallint) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Map 5 + Map Operator Tree: + TableScan + alias: v1 + Filter Operator + predicate: (ctinyint is not null and csmallint is not null) (type: boolean) + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + value expressions: _col1 (type: smallint), _col2 (type: double) + Map 9 + Map Operator Tree: + TableScan + alias: v1 + Filter Operator + predicate: ctinyint is not null (type: boolean) + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Reducer 6 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1, _col2 + Select Operator + expressions: _col1 (type: smallint), _col2 (type: double) + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + value expressions: _col1 (type: double) + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col1 + Select Operator + expressions: _col1 (type: double) + outputColumnNames: _col0 + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + sort order: + value expressions: _col0 (type: double) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index 6f11b8c..484f74f 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -27,12 +27,12 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2 Stage-9 has a backup stage: Stage-2 Stage-5 depends on stages: Stage-9 + Stage-2 Stage-10 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-10 - Stage-2 Stage-11 is a root stage Stage-8 depends on stages: Stage-11 - Stage-0 depends on stages: Stage-5, Stage-6, Stage-2 + Stage-0 depends on stages: Stage-5, Stage-2, Stage-6 STAGE PLANS: Stage: Stage-4 @@ -111,43 +111,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Select Operator - expressions: _col1 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -185,6 +148,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Select Operator + expressions: _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: @@ -292,12 +292,12 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-4, Stage-8 , consists of Stage-9, Stage-10, Stage-2 Stage-9 has a backup stage: Stage-2 Stage-5 depends on stages: Stage-9 + Stage-2 Stage-10 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-10 - Stage-2 Stage-11 is a root stage Stage-8 depends on stages: Stage-11 - Stage-0 depends on stages: Stage-5, Stage-6, Stage-2 + Stage-0 depends on stages: Stage-5, Stage-2, Stage-6 STAGE PLANS: Stage: Stage-4 @@ -376,43 +376,6 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-10 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$INTNAME - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$INTNAME - TableScan - HashTable Sink Operator - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - - Stage: Stage-6 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col2 - Select Operator - expressions: _col1 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Local Work: - Map Reduce Local Work - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -450,6 +413,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Select Operator + expressions: _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: