commit e879f4bd80c2c54a985ee410468ebb9815c9f57b Author: Janaki Lahorani Date: Wed Nov 29 15:36:36 2017 -0800 HIVE-17396: Support DPP with map join when pruning dependencies and map join dependencies are not aligned. diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b7d3e99e1a505f576a06c530080fc72dddcd85ba..a0f82c8e2564a15422886b9a456b35a053492156 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3581,8 +3581,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Maximum total data size in dynamic pruning."), SPARK_DYNAMIC_PARTITION_PRUNING_MAP_JOIN_ONLY( "hive.spark.dynamic.partition.pruning.map.join.only", false, - "Turn on dynamic partition pruning only for map joins.\n" + - "If hive.spark.dynamic.partition.pruning is set to true, this parameter value is ignored."), + "Turn on dynamic partition pruning only for map joins when a new spark job is not \n" + + "created. If hive.spark.dynamic.partition.pruning is set to true, this parameter value \n" + + "is ignored."), SPARK_USE_GROUPBY_SHUFFLE( "hive.spark.use.groupby.shuffle", true, "Spark groupByKey transformation has better performance but uses unbounded memory." + @@ -4931,6 +4932,12 @@ public boolean isWebUiQueryInfoCacheEnabled() { return isWebUiEnabled() && this.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES) > 0; } + /* Dynamic partition pruning is enabled in all cases + */ + public boolean isSparkDPPAll() { + return (this.getBoolVar(ConfVars.SPARK_DYNAMIC_PARTITION_PRUNING)); + } + /* Dynamic partition pruning is enabled in some or all cases */ public boolean isSparkDPPAny() { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkDynamicPartitionPruningResolver.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkDynamicPartitionPruningResolver.java index caf5dcc6a7959d0a2041ecbd929ef9364b7c891a..8db949dd1291790f0cf0fef6ca29b1fe05d4e698 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkDynamicPartitionPruningResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SparkDynamicPartitionPruningResolver.java @@ -19,10 +19,16 @@ package org.apache.hadoop.hive.ql.optimizer.physical; import java.io.Serializable; +import java.util.Map; +import java.util.HashMap; +import java.util.List; import java.util.ArrayList; import java.util.Set; import java.util.Stack; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.plan.SparkWork; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,19 +69,93 @@ private static final Logger LOG = LoggerFactory.getLogger(SparkDynamicPartitionPruningResolver.class.getName()); + // If parent work and target work are scheduled within the same spark task, the parent work + // needs to be moved to a separate spark task that will execute before the task with the target + // work. This list will collect all such parent works. + private Map, SparkWork> taskListHashMap = new HashMap<>(); + @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { // Walk through the Task Graph and invoke SparkDynamicPartitionPruningDispatcher - TaskGraphWalker graphWalker = new TaskGraphWalker(new SparkDynamicPartitionPruningDispatcher()); + TaskGraphWalker graphWalker = + new TaskGraphWalker(new SparkDynamicPartitionPruningDispatcher(pctx)); + + // Check whether source work is scheduled in a spark task that will execute before the target + // work. This will change to false when all dependencies are checked and all tasks are + // scheduled to be executed in order. + boolean checkDPPDependencies = true; + List rootTasks = new ArrayList<>(); + + while (checkDPPDependencies) { + rootTasks.addAll(pctx.getRootTasks()); + + // During the walk, all parent tasks to be moved up will be saved in taskListHashMap + graphWalker.startWalking(rootTasks, null); + + if (!taskListHashMap.isEmpty()) { + // taskListHashMap is not empty. Move spark works out to new parent spark task. + for (Map.Entry, SparkWork> entry : + taskListHashMap.entrySet()) { + moveSparkWork(pctx, entry.getKey(), entry.getValue()); + } + + // Clear the structures used by graph walker to check for dpp dependencies again. + taskListHashMap.clear(); + rootTasks.clear(); + } else { + // All tasks are scheduled in order. The loop can end. + checkDPPDependencies = false; + } + } - ArrayList rootTasks = new ArrayList<>(); - rootTasks.addAll(pctx.getRootTasks()); - graphWalker.startWalking(rootTasks, null); return pctx; } + // Both parent and child of DPP dependency is part of task. The parent needs to be moved to a + // new spark task that will run before this task. + private void moveSparkWork(PhysicalContext pctx, Task task, + SparkWork parentSparkWork) { + // Get a new parent spark task for the parent spark work + SparkTask newSparkTask = (SparkTask) TaskFactory.get(parentSparkWork, pctx.conf); + + // The parent of the spark task from which a sparkWork is moving out will be the + // parent of dependent task. + if (task.getNumParent() > 0) { + // Walk through parent list and add dependency on newSparkTask + for (Task parentTask : task.getParentTasks()) { + parentTask.addDependentTask(newSparkTask); + } + // Walk through parent list and remove dependency on spark task + for (Task parentTask : newSparkTask.getParentTasks()) { + parentTask.removeDependentTask(task); + } + } + + // The new spark tast will be a parent to the task from which spark work moved out + newSparkTask.addDependentTask(task); + + // If task was a root task, it won't be anymore. Instead the new spark task will be the + // root task. + if (pctx.getRootTasks().contains(task)){ + pctx.addToRootTask(newSparkTask); + pctx.removeFromRootTask(task); + } + + // The base works that are moved to new spark task should be removed from task + for (BaseWork movingBaseWork: newSparkTask.getWork().getAllWork()) { + ((SparkTask) task).getWork().remove(movingBaseWork); + } + } + private class SparkDynamicPartitionPruningDispatcher implements Dispatcher { + private final PhysicalContext physicalContext; + + // Save physical context. This is needed to access configuration. + SparkDynamicPartitionPruningDispatcher(PhysicalContext pctx) { + physicalContext = pctx; + } + @Override public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SemanticException { Task task = (Task) nd; @@ -92,12 +172,35 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) throws SparkPartitionPruningSinkOperator pruningSinkOp = (SparkPartitionPruningSinkOperator) op; MapWork targetMapWork = pruningSinkOp.getConf().getTargetMapWork(); - // Check if the given SparkTask has a child SparkTask that contains the target MapWork - // If it does not, then remove the DPP op - if (!taskContainsDependentMapWork(task, targetMapWork)) { - LOG.info("Disabling DPP for source work " + baseWork.getName() + " for target work " - + targetMapWork.getName() + " as no dependency exists between the source and target work"); - removeSparkPartitionPruningSink(baseWork, targetMapWork, pruningSinkOp); + // If DPP is enabled for All, then identify cases where both target and source are + // scheduled within the same spark task. This case can be handled by moving the + // source work to a new parent task. + // + // Else part: If DPP is not enabled for All cases, then it is enabled only for map join + // cases where new spark task is not created. So, this operator should be removed. + // Also the case where the source is in the child task and target is in the parent task, + // is not supported hence operator will be removed. + if (physicalContext.getConf().isSparkDPPAll() && task.getMapWork() + .contains(targetMapWork)) { + // Both target and source are scheduled within the same spark work. Save the source + // so that it can be moved up to a separate spark task + LOG.info("Moving DPP source " + baseWork.getName() + " to a separate parent spark " + + "task because target " + targetMapWork.getName() + " is scheduled to run in " + + "the same job as source."); + SparkWork movingSparkWork = taskListHashMap.computeIfAbsent(task, + v -> new SparkWork(physicalContext.conf.getVar(HiveConf.ConfVars.HIVEQUERYID))); + + movingSparkWork.add(baseWork); + break; + } else { + // Check if the given SparkTask has a child SparkTask that contains the target MapWork + // If it does not, then remove the DPP op + if (!taskContainsDependentMapWork(task, targetMapWork)) { + LOG.info("Disabling DPP for source work " + baseWork.getName() + " for target work " + + targetMapWork.getName() + + " as no dependency exists between the source and target work"); + removeSparkPartitionPruningSink(baseWork, targetMapWork, pruningSinkOp); + } } } } diff --git ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_recursive_mapjoin.q ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_recursive_mapjoin.q index 5e19b9762c0a8383f73a50ece0e46c65fde7ee31..251faae586a03351475b8a9a5d192b70015ad6ae 100644 --- ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_recursive_mapjoin.q +++ ql/src/test/queries/clientpositive/spark_dynamic_partition_pruning_recursive_mapjoin.q @@ -1,4 +1,3 @@ -SET hive.spark.dynamic.partition.pruning=true; SET hive.auto.convert.join=true; SET hive.strict.checks.cartesian.product=false; @@ -52,6 +51,102 @@ INSERT INTO TABLE part_table5 PARTITION (part5_col = 5) VALUES (5); INSERT INTO table reg_table VALUES (1), (2), (3), (4), (5), (6); +----------------------------Test 1----------------------------------------------- +-- DPP enabled only for map join. If additional spark jobs are required DPP will not be picked. +-- So, for recursive map join DPP is not picked +SET hive.spark.dynamic.partition.pruning=false; +SET hive.spark.dynamic.partition.pruning.map.join.only=true; + +-- 3 table join pt2 pruned based on scan from pt1 +explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col; + +SELECT * +FROM part_table1 pt1, + part_table2 pt2, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col; + +-- 4 table join pt3 pruned based on pt2, pt2 pruned based on pt1 +explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col; + +SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col; + +-- 5 table join pt4 pruned based on pt3, pt3 pruned based on pt2, pt2 pruned based on pt1 +explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col + AND pt4.part4_col = pt1.part1_col; + +SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col +AND pt4.part4_col = pt1.part1_col; + +-- 6 table join pt5 pruned based on pt4, pt4 pruned based on pt3, pt3 pruned based on pt2, +-- pt2 pruned based on pt1 +explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + part_table5 pt5, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col + AND pt4.part4_col = pt1.part1_col + AND pt5.part5_col = pt1.part1_col; + +SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + part_table5 pt5, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col +AND pt4.part4_col = pt1.part1_col +AND pt5.part5_col = pt1.part1_col; +----------------------------Test 1----------------------------------------------- + +----------------------------Test 2----------------------------------------------- +-- DPP enabled for all cases. Because of chained DPP, new spark jobs will be created. +SET hive.spark.dynamic.partition.pruning=true; + -- 3 table join pt2 pruned based on scan from pt1 explain SELECT * FROM part_table1 pt1, @@ -136,6 +231,7 @@ AND pt2.part2_col = pt1.part1_col AND pt3.part3_col = pt1.part1_col AND pt4.part4_col = pt1.part1_col AND pt5.part5_col = pt1.part1_col; +----------------------------Test 2----------------------------------------------- -- Cleanup DROP TABLE part_table1; diff --git ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_recursive_mapjoin.q.out ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_recursive_mapjoin.q.out index 828c54fb7936a00d29d8f957265b347ce2d88dbe..4dd9bf6839c44d841afe79886d627ac13c4684ae 100644 --- ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_recursive_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/spark_dynamic_partition_pruning_recursive_mapjoin.q.out @@ -1008,6 +1008,1130 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + part_table5 pt5, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col +AND pt4.part4_col = pt1.part1_col +AND pt5.part5_col = pt1.part1_col +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table1 +PREHOOK: Input: default@part_table1@part1_col=1 +PREHOOK: Input: default@part_table2 +PREHOOK: Input: default@part_table2@part2_col=1 +PREHOOK: Input: default@part_table2@part2_col=2 +PREHOOK: Input: default@part_table3 +PREHOOK: Input: default@part_table3@part3_col=1 +PREHOOK: Input: default@part_table3@part3_col=2 +PREHOOK: Input: default@part_table3@part3_col=3 +PREHOOK: Input: default@part_table4 +PREHOOK: Input: default@part_table4@part4_col=1 +PREHOOK: Input: default@part_table4@part4_col=2 +PREHOOK: Input: default@part_table4@part4_col=3 +PREHOOK: Input: default@part_table4@part4_col=4 +PREHOOK: Input: default@part_table5 +PREHOOK: Input: default@part_table5@part5_col=1 +PREHOOK: Input: default@part_table5@part5_col=2 +PREHOOK: Input: default@part_table5@part5_col=3 +PREHOOK: Input: default@part_table5@part5_col=4 +PREHOOK: Input: default@part_table5@part5_col=5 +PREHOOK: Input: default@reg_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + part_table5 pt5, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col +AND pt4.part4_col = pt1.part1_col +AND pt5.part5_col = pt1.part1_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table1 +POSTHOOK: Input: default@part_table1@part1_col=1 +POSTHOOK: Input: default@part_table2 +POSTHOOK: Input: default@part_table2@part2_col=1 +POSTHOOK: Input: default@part_table2@part2_col=2 +POSTHOOK: Input: default@part_table3 +POSTHOOK: Input: default@part_table3@part3_col=1 +POSTHOOK: Input: default@part_table3@part3_col=2 +POSTHOOK: Input: default@part_table3@part3_col=3 +POSTHOOK: Input: default@part_table4 +POSTHOOK: Input: default@part_table4@part4_col=1 +POSTHOOK: Input: default@part_table4@part4_col=2 +POSTHOOK: Input: default@part_table4@part4_col=3 +POSTHOOK: Input: default@part_table4@part4_col=4 +POSTHOOK: Input: default@part_table5 +POSTHOOK: Input: default@part_table5@part5_col=1 +POSTHOOK: Input: default@part_table5@part5_col=2 +POSTHOOK: Input: default@part_table5@part5_col=3 +POSTHOOK: Input: default@part_table5@part5_col=4 +POSTHOOK: Input: default@part_table5@part5_col=5 +POSTHOOK: Input: default@reg_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 1 1 1 1 1 1 1 1 1 1 +PREHOOK: query: explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: pt1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part1_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [1:part2_col (int)] + partition key expr: [part2_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 1] + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pt2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part2_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: rt + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: col is not null (type: boolean) + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 0 Map 1 + 1 Map 2 + Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 13 Data size: 13 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM part_table1 pt1, + part_table2 pt2, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table1 +PREHOOK: Input: default@part_table1@part1_col=1 +PREHOOK: Input: default@part_table2 +PREHOOK: Input: default@part_table2@part2_col=1 +PREHOOK: Input: default@part_table2@part2_col=2 +PREHOOK: Input: default@reg_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * +FROM part_table1 pt1, + part_table2 pt2, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table1 +POSTHOOK: Input: default@part_table1@part1_col=1 +POSTHOOK: Input: default@part_table2 +POSTHOOK: Input: default@part_table2@part2_col=1 +POSTHOOK: Input: default@part_table2@part2_col=2 +POSTHOOK: Input: default@reg_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 1 1 1 1 +PREHOOK: query: explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: pt1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part1_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [1:part2_col (int)] + partition key expr: [part2_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 1] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [3:part3_col (int)] + partition key expr: [part3_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 3] + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pt2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part2_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [3:part3_col (int)] + partition key expr: [part3_col] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + target works: [Map 3] + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: pt3 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part3_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: rt + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: col is not null (type: boolean) + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 1 to 3 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 0 Map 1 + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 19 Data size: 19 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table1 +PREHOOK: Input: default@part_table1@part1_col=1 +PREHOOK: Input: default@part_table2 +PREHOOK: Input: default@part_table2@part2_col=1 +PREHOOK: Input: default@part_table2@part2_col=2 +PREHOOK: Input: default@part_table3 +PREHOOK: Input: default@part_table3@part3_col=1 +PREHOOK: Input: default@part_table3@part3_col=2 +PREHOOK: Input: default@part_table3@part3_col=3 +PREHOOK: Input: default@reg_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table1 +POSTHOOK: Input: default@part_table1@part1_col=1 +POSTHOOK: Input: default@part_table2 +POSTHOOK: Input: default@part_table2@part2_col=1 +POSTHOOK: Input: default@part_table2@part2_col=2 +POSTHOOK: Input: default@part_table3 +POSTHOOK: Input: default@part_table3@part3_col=1 +POSTHOOK: Input: default@part_table3@part3_col=2 +POSTHOOK: Input: default@part_table3@part3_col=3 +POSTHOOK: Input: default@reg_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 1 1 1 1 1 1 +PREHOOK: query: explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col + AND pt4.part4_col = pt1.part1_col +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col + AND pt4.part4_col = pt1.part1_col +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: pt1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part1_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [1:part2_col (int)] + partition key expr: [part2_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 1] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [3:part3_col (int)] + partition key expr: [part3_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 3] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [4:part4_col (int)] + partition key expr: [part4_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 4] + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pt2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part2_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [3:part3_col (int)] + partition key expr: [part3_col] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + target works: [Map 3] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [4:part4_col (int)] + partition key expr: [part4_col] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + target works: [Map 4] + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: pt3 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part3_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [4:part4_col (int)] + partition key expr: [part4_col] + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target works: [Map 4] + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: pt4 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part4_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: rt + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: col is not null (type: boolean) + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 1 to 3 + Inner Join 1 to 4 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + input vertices: + 0 Map 1 + 1 Map 2 + 2 Map 3 + 3 Map 4 + Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 26 Data size: 26 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col +AND pt4.part4_col = pt1.part1_col +PREHOOK: type: QUERY +PREHOOK: Input: default@part_table1 +PREHOOK: Input: default@part_table1@part1_col=1 +PREHOOK: Input: default@part_table2 +PREHOOK: Input: default@part_table2@part2_col=1 +PREHOOK: Input: default@part_table2@part2_col=2 +PREHOOK: Input: default@part_table3 +PREHOOK: Input: default@part_table3@part3_col=1 +PREHOOK: Input: default@part_table3@part3_col=2 +PREHOOK: Input: default@part_table3@part3_col=3 +PREHOOK: Input: default@part_table4 +PREHOOK: Input: default@part_table4@part4_col=1 +PREHOOK: Input: default@part_table4@part4_col=2 +PREHOOK: Input: default@part_table4@part4_col=3 +PREHOOK: Input: default@part_table4@part4_col=4 +PREHOOK: Input: default@reg_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT * +FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + reg_table rt +WHERE rt.col = pt1.part1_col +AND pt2.part2_col = pt1.part1_col +AND pt3.part3_col = pt1.part1_col +AND pt4.part4_col = pt1.part1_col +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part_table1 +POSTHOOK: Input: default@part_table1@part1_col=1 +POSTHOOK: Input: default@part_table2 +POSTHOOK: Input: default@part_table2@part2_col=1 +POSTHOOK: Input: default@part_table2@part2_col=2 +POSTHOOK: Input: default@part_table3 +POSTHOOK: Input: default@part_table3@part3_col=1 +POSTHOOK: Input: default@part_table3@part3_col=2 +POSTHOOK: Input: default@part_table3@part3_col=3 +POSTHOOK: Input: default@part_table4 +POSTHOOK: Input: default@part_table4@part4_col=1 +POSTHOOK: Input: default@part_table4@part4_col=2 +POSTHOOK: Input: default@part_table4@part4_col=3 +POSTHOOK: Input: default@part_table4@part4_col=4 +POSTHOOK: Input: default@reg_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +1 1 1 1 1 1 1 1 1 +PREHOOK: query: explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + part_table5 pt5, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col + AND pt4.part4_col = pt1.part1_col + AND pt5.part5_col = pt1.part1_col +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT * + FROM part_table1 pt1, + part_table2 pt2, + part_table3 pt3, + part_table4 pt4, + part_table5 pt5, + reg_table rt + WHERE rt.col = pt1.part1_col + AND pt2.part2_col = pt1.part1_col + AND pt3.part3_col = pt1.part1_col + AND pt4.part4_col = pt1.part1_col + AND pt5.part5_col = pt1.part1_col +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-4 depends on stages: Stage-5 + Stage-3 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: + Map 2 + Map Operator Tree: + TableScan + alias: pt1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part1_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col1 (type: int) + 5 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [1:part2_col (int)] + partition key expr: [part2_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 1] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [3:part3_col (int)] + partition key expr: [part3_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 3] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [4:part4_col (int)] + partition key expr: [part4_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 4] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [5:part5_col (int)] + partition key expr: [part5_col] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + target works: [Map 5] + Local Work: + Map Reduce Local Work + + Stage: Stage-5 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pt2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part2_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col1 (type: int) + 5 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [3:part3_col (int)] + partition key expr: [part3_col] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + target works: [Map 3] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [4:part4_col (int)] + partition key expr: [part4_col] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + target works: [Map 4] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [5:part5_col (int)] + partition key expr: [part5_col] + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + target works: [Map 5] + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Spark +#### A masked pattern was here #### + Vertices: + Map 3 + Map Operator Tree: + TableScan + alias: pt3 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part3_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col1 (type: int) + 5 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [4:part4_col (int)] + partition key expr: [part4_col] + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target works: [Map 4] + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [5:part5_col (int)] + partition key expr: [part5_col] + Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + target works: [Map 5] + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: pt4 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part4_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col1 (type: int) + 5 _col0 (type: int) + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Spark Partition Pruning Sink Operator + Target column: [5:part5_col (int)] + partition key expr: [part5_col] + Statistics: Num rows: 4 Data size: 4 Basic stats: COMPLETE Column stats: NONE + target works: [Map 5] + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + alias: pt5 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int), part5_col (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col1 (type: int) + 5 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 6 + Map Operator Tree: + TableScan + alias: rt + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: col is not null (type: boolean) + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 1 to 2 + Inner Join 1 to 3 + Inner Join 1 to 4 + Inner Join 1 to 5 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + 2 _col1 (type: int) + 3 _col1 (type: int) + 4 _col1 (type: int) + 5 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + input vertices: + 0 Map 1 + 1 Map 2 + 2 Map 3 + 3 Map 4 + 4 Map 5 + Statistics: Num rows: 33 Data size: 33 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col3 (type: int), _col0 (type: int), _col1 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: int), _col10 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 33 Data size: 33 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 33 Data size: 33 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT * FROM part_table1 pt1, part_table2 pt2,