diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index c357329..41e0316 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -32,18 +32,22 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MuxOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.OperatorUtils; +import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.UDTFOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -61,6 +65,8 @@ import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.util.ReflectionUtils; +import com.google.common.collect.Lists; + /** * ConvertJoinMapJoin is an optimization that replaces a common join * (aka shuffle join) with a map join (aka broadcast or fragment replicate @@ -538,16 +544,20 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); int bigTablePosition = -1; - + // number of costly ops (Join, GB, PTF/Windowing, TF) below the big input + int bigInputNumberCostlyOps = -1; + // stats of the big input Statistics bigInputStat = null; - long totalSize = 0; - int pos = 0; // bigTableFound means we've encountered a table that's bigger than the // max. This table is either the the big table or we cannot convert. boolean bigTableFound = false; - for (Operator parentOp : joinOp.getParentOperators()) { + // total size of the inputs + long totalSize = 0; + + for (int pos = 0; pos < joinOp.getParentOperators().size(); pos++) { + Operator parentOp = joinOp.getParentOperators().get(pos); Statistics currInputStat = parentOp.getStatistics(); if (currInputStat == null) { @@ -555,9 +565,10 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c return -1; } + int inputNumberCostlyOps = getNumberOfCostlyOps(parentOp); long inputSize = currInputStat.getDataSize(); - if ((bigInputStat == null) - || ((bigInputStat != null) && (inputSize > bigInputStat.getDataSize()))) { + if (bigInputNumberCostlyOps == -1 || inputNumberCostlyOps > bigInputNumberCostlyOps + || (inputNumberCostlyOps == bigInputNumberCostlyOps && inputSize > bigInputStat.getDataSize())) { if (bigTableFound) { // cannot convert to map join; we've already chosen a big table @@ -589,6 +600,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c if (bigTableCandidateSet.contains(pos)) { bigTablePosition = pos; + bigInputNumberCostlyOps = inputNumberCostlyOps; bigInputStat = currInputStat; } } else { @@ -598,12 +610,39 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c return -1; } } - pos++; } return bigTablePosition; } + /* Count the number of costly ops below the input operator */ + private static int getNumberOfCostlyOps(Operator op) { + if (op.getParentOperators() == null || op.getParentOperators().size() == 0) { + return 0; + } + + int numberOps = 0; + List> ops = Lists.newArrayList(op.getParentOperators()); + while (!ops.isEmpty()) { + List> newOps = Lists.newArrayList(); + for (Operator parentOp : ops){ + if (parentOp instanceof CommonJoinOperator || + parentOp instanceof GroupByOperator || + parentOp instanceof LateralViewJoinOperator || + parentOp instanceof PTFOperator || + parentOp instanceof ReduceSinkOperator || + parentOp instanceof UDTFOperator) { + numberOps++; + } + if (parentOp.getParentOperators() != null && parentOp.getParentOperators().size() != 0) { + newOps.addAll(parentOp.getParentOperators()); + } + } + ops = newOps; + } + return numberOps; + } + /* * Once we have decided on the map join, the tree would transform from * @@ -616,7 +655,6 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c * * for tez. */ - public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcContext context, int bigTablePosition, boolean removeReduceSink) throws SemanticException { // bail on mux operator because currently the mux operator masks the emit keys diff --git ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out index 7194be5..0d22ea7 100644 --- ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out @@ -245,9 +245,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -271,7 +270,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -283,25 +282,11 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Reducer 2 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -314,12 +299,26 @@ STAGE PLANS: expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Reducer 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out index e90af15..3631993 100644 --- ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out +++ ql/src/test/results/clientpositive/tez/auto_sortmerge_join_12.q.out @@ -138,7 +138,7 @@ POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket3out POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@bucket_medium@ds=2008-04-08 -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Map 3' is a cross product +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain extended select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key @@ -211,9 +211,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Map 3 <- Map 2 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -309,13 +308,41 @@ STAGE PLANS: Position of Big Table: 1 Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col6 (type: string), _col6 (type: string) - sort order: ++ - Map-reduce partition columns: _col6 (type: string), _col6 (type: string) - Statistics: Num rows: 1 Data size: 125 Basic stats: COMPLETE Column stats: NONE - tag: 0 - auto parallelism: true + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 4 => 58 + keys: + 0 _col6 (type: string), _col6 (type: string) + 1 key (type: string), key (type: string) + input vertices: + 1 Map 4 + Position of Big Table: 0 + Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + Estimated key counts: Map 5 => 1 + keys: + 0 + 1 + input vertices: + 1 Map 5 + Position of Big Table: 0 + Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -369,7 +396,7 @@ STAGE PLANS: name: default.bucket_medium Truncated Path -> Alias: /bucket_medium/ds=2008-04-08 [b] - Map 3 + Map 4 Map Operator Tree: TableScan alias: c @@ -379,41 +406,13 @@ STAGE PLANS: isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 2 => 1 - keys: - 0 _col6 (type: string), _col6 (type: string) - 1 key (type: string), key (type: string) - input vertices: - 0 Map 2 - Position of Big Table: 1 - Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Map Join Operator - condition map: - Inner Join 0 to 1 - Estimated key counts: Map 5 => 1 - keys: - 0 - 1 - input vertices: - 1 Map 5 - Position of Big Table: 0 - Statistics: Num rows: 69 Data size: 7032 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: bigint) - auto parallelism: false + Reduce Output Operator + key expressions: key (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string), key (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + tag: 1 + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -580,7 +579,7 @@ STAGE PLANS: name: default.bucket_medium Truncated Path -> Alias: /bucket_medium/ds=2008-04-08 [d] - Reducer 4 + Reducer 3 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -616,7 +615,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Map 3' is a cross product +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_medium b ON a.key = b.key JOIN bucket_big c ON c.key = b.key JOIN bucket_medium d ON c.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@bucket_big diff --git ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out index af5e6e6..3ccc52f 100644 --- ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out @@ -323,8 +323,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -355,28 +354,12 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -389,12 +372,28 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -543,8 +542,7 @@ STAGE PLANS: Tez Edges: Map 1 <- Map 3 (CUSTOM_EDGE) - Map 4 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -599,28 +597,12 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -633,12 +615,28 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 66 Data size: 700 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 66 Data size: 700 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -848,8 +846,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -880,28 +877,12 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -910,12 +891,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -941,8 +938,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -967,28 +963,12 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -997,12 +977,28 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7 Data size: 728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/bucket_map_join_tez2.q.out ql/src/test/results/clientpositive/tez/bucket_map_join_tez2.q.out index 3f980b6..959d7d0 100644 --- ql/src/test/results/clientpositive/tez/bucket_map_join_tez2.q.out +++ ql/src/test/results/clientpositive/tez/bucket_map_join_tez2.q.out @@ -516,8 +516,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -546,24 +545,11 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -571,11 +557,24 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -595,8 +594,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -625,28 +623,12 @@ STAGE PLANS: Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(key) (type: double) - outputColumnNames: _col0, _col2 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -654,12 +636,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(key) (type: double) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 3 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out index 14dd820..b69924a 100644 --- ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out +++ ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out @@ -91,7 +91,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[18][bigTable=a] in task 'Map 3' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A PREHOOK: type: QUERY POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A @@ -104,8 +104,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Map 3 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -127,10 +126,28 @@ STAGE PLANS: 1 Map 2 Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 + input vertices: + 1 Map 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 2 Map Operator Tree: TableScan @@ -150,28 +167,10 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Stage: Stage-0 Fetch Operator @@ -179,7 +178,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[24][bigTable=a] in task 'Map 4' is a cross product +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 on d1.key = d2.key @@ -199,8 +198,7 @@ STAGE PLANS: Tez Edges: Map 1 <- Map 3 (BROADCAST_EDGE) - Map 4 <- Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -250,28 +248,10 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Reducer 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -279,10 +259,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col5 + input vertices: + 0 Map 4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -291,7 +289,7 @@ STAGE PLANS: ListSink Warning: Map Join MAPJOIN[17][bigTable=d1] in task 'Map 1' is a cross product -Warning: Map Join MAPJOIN[18][bigTable=a] in task 'Map 4' is a cross product +Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1 PREHOOK: type: QUERY POSTHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1 @@ -305,8 +303,7 @@ STAGE PLANS: Tez Edges: Map 1 <- Map 3 (BROADCAST_EDGE) - Map 4 <- Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -348,28 +345,10 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col5 - input vertices: - 1 Reducer 2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -377,10 +356,28 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col5 + input vertices: + 0 Map 4 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -388,7 +385,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[29][bigTable=?] in task 'Reducer 4' is a cross product PREHOOK: query: explain select * from (select A.key from A group by key) ss join (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 @@ -406,8 +403,8 @@ STAGE PLANS: Tez Edges: Map 3 <- Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -478,6 +475,17 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -486,7 +494,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 4 + 0 Reducer 2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true File Output Operator @@ -496,17 +504,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out index e11f3e5..02f0060 100644 --- ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out @@ -4125,7 +4125,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY @@ -4140,9 +4140,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4153,26 +4152,10 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -4192,7 +4175,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + input vertices: + 0 Map 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4206,18 +4217,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4225,7 +4224,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -4658,9 +4657,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Map 3 <- Map 2 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4696,12 +4694,26 @@ STAGE PLANS: 0 Map 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: '13' (type: string) - sort order: + - Map-reduce partition columns: '13' (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '13' (type: string) + 1 '13' (type: string) + input vertices: + 1 Map 4 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Map 3 + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: srcpart_hour @@ -4710,26 +4722,12 @@ STAGE PLANS: Filter Operator predicate: (hr = 13) (type: boolean) Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '13' (type: string) - 1 '13' (type: string) - input vertices: - 0 Map 2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + key expressions: '13' (type: string) + sort order: + + Map-reduce partition columns: '13' (type: string) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/tez/explainuser_2.q.out ql/src/test/results/clientpositive/tez/explainuser_2.q.out index 57fcc3c..d515ed0 100644 --- ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -1745,19 +1745,17 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) Map 12 <- Union 10 (CONTAINS) -Map 14 <- Reducer 11 (BROADCAST_EDGE), Union 7 (CONTAINS) -Map 4 <- Union 2 (CONTAINS) -Map 6 <- Reducer 3 (BROADCAST_EDGE), Union 7 (CONTAINS) +Map 6 <- Union 2 (CONTAINS) Map 9 <- Union 10 (CONTAINS) -Reducer 11 <- Map 13 (BROADCAST_EDGE), Union 10 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 11 <- Map 13 (BROADCAST_EDGE), Map 14 (BROADCAST_EDGE), Union 10 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 3 <- Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 8 + Reducer 5 File Output Operator [FS_59] compressed:false Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE @@ -1766,8 +1764,8 @@ Stage-0 | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Union 7 [SIMPLE_EDGE] - |<-Map 14 [CONTAINS] + |<-Union 4 [SIMPLE_EDGE] + |<-Reducer 11 [CONTAINS] | Reduce Output Operator [RS_56] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1785,90 +1783,89 @@ Stage-0 | | keys:{"Reducer 11":"_col2 (type: string)","Map 14":"_col0 (type: string)"} | | outputColumnNames:["_col1","_col2"] | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 11 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_47] - | | key expressions:_col2 (type: string) - | | Map-reduce partition columns:_col2 (type: string) + | |<-Map 14 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_49] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string) - | | Map Join Operator [MAPJOIN_84] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Reducer 11":"_col1 (type: string)","Map 13":"_col1 (type: string)"} - | | | outputColumnNames:["_col1","_col2"] - | | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 13 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_44] - | | | key expressions:_col1 (type: string) - | | | Map-reduce partition columns:_col1 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col0 (type: string) - | | | Select Operator [SEL_38] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_80] - | | | predicate:(value is not null and key is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_37] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_36] - | | outputColumnNames:["_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_35] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 10 [SIMPLE_EDGE] - | | |<-Map 12 [CONTAINS] - | | | Reduce Output Operator [RS_34] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | | Group By Operator [GBY_33] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | | Select Operator [SEL_29] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_79] - | | | predicate:value is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_28] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 9 [CONTAINS] - | | Reduce Output Operator [RS_34] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_33] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_27] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_78] - | | predicate:value is not null (type: boolean) - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_26] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_40] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_81] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_39] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map 6 [CONTAINS] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_40] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_81] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_39] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map Join Operator [MAPJOIN_84] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Reducer 11":"_col1 (type: string)","Map 13":"_col1 (type: string)"} + | | outputColumnNames:["_col1","_col2"] + | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + | |<-Map 13 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_44] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_38] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_80] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_37] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_36] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_35] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 10 [SIMPLE_EDGE] + | |<-Map 12 [CONTAINS] + | | Reduce Output Operator [RS_34] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_33] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_29] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_79] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_28] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map 9 [CONTAINS] + | Reduce Output Operator [RS_34] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_33] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_27] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_78] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_26] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [CONTAINS] Reduce Output Operator [RS_56] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -1883,92 +1880,91 @@ Stage-0 Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator [MAPJOIN_83] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Reducer 3":"_col2 (type: string)","Map 6":"_col0 (type: string)"} + | keys:{"Reducer 3":"_col2 (type: string)","Map 8":"_col0 (type: string)"} | outputColumnNames:["_col1","_col2"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 3 [BROADCAST_EDGE] - | Reduce Output Operator [RS_21] - | key expressions:_col2 (type: string) - | Map-reduce partition columns:_col2 (type: string) + |<-Map 8 [BROADCAST_EDGE] + | Reduce Output Operator [RS_23] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string) - | Map Join Operator [MAPJOIN_82] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Reducer 3":"_col1 (type: string)","Map 5":"_col1 (type: string)"} - | | outputColumnNames:["_col1","_col2"] - | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - | |<-Map 5 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_18] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Select Operator [SEL_12] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_76] - | | predicate:(value is not null and key is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_11] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_10] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_7] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_74] - | | predicate:value is not null (type: boolean) - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_0] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Map 4 [CONTAINS] - | Reduce Output Operator [RS_8] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_7] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_3] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_75] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_14] - outputColumnNames:["_col0"] - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_77] - predicate:key is not null (type: boolean) - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_13] - alias:y - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_14] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_77] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_13] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_82] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 3":"_col1 (type: string)","Map 7":"_col1 (type: string)"} + | outputColumnNames:["_col1","_col2"] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + |<-Map 7 [BROADCAST_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_12] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_76] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_11] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_10] + outputColumnNames:["_col1"] + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_9] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_7] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_74] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map 6 [CONTAINS] + Reduce Output Operator [RS_8] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_7] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_3] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_75] + predicate:value is not null (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:y + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, y.value @@ -2003,28 +1999,25 @@ Map 1 <- Union 2 (CONTAINS) Map 11 <- Union 12 (CONTAINS) Map 16 <- Union 12 (CONTAINS) Map 17 <- Union 14 (CONTAINS) -Map 19 <- Reducer 15 (BROADCAST_EDGE), Union 7 (CONTAINS) Map 20 <- Union 21 (CONTAINS) Map 27 <- Union 21 (CONTAINS) Map 28 <- Union 23 (CONTAINS) Map 29 <- Union 25 (CONTAINS) -Map 31 <- Reducer 26 (BROADCAST_EDGE), Union 9 (CONTAINS) -Map 4 <- Union 2 (CONTAINS) -Map 6 <- Reducer 3 (BROADCAST_EDGE), Union 7 (CONTAINS) -Reducer 10 <- Union 9 (SIMPLE_EDGE) +Map 8 <- Union 2 (CONTAINS) Reducer 13 <- Union 12 (SIMPLE_EDGE), Union 14 (CONTAINS) -Reducer 15 <- Map 18 (BROADCAST_EDGE), Union 14 (SIMPLE_EDGE) +Reducer 15 <- Map 18 (BROADCAST_EDGE), Map 19 (BROADCAST_EDGE), Union 14 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 22 <- Union 21 (SIMPLE_EDGE), Union 23 (CONTAINS) Reducer 24 <- Union 23 (SIMPLE_EDGE), Union 25 (CONTAINS) -Reducer 26 <- Map 30 (BROADCAST_EDGE), Union 25 (SIMPLE_EDGE) -Reducer 3 <- Map 5 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 8 <- Union 7 (SIMPLE_EDGE), Union 9 (CONTAINS) +Reducer 26 <- Map 30 (BROADCAST_EDGE), Map 31 (BROADCAST_EDGE), Union 25 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 3 <- Map 10 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 5 <- Union 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 7 <- Union 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 10 + Reducer 7 File Output Operator [FS_119] compressed:false Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE @@ -2033,8 +2026,8 @@ Stage-0 | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Union 9 [SIMPLE_EDGE] - |<-Map 31 [CONTAINS] + |<-Union 6 [SIMPLE_EDGE] + |<-Reducer 26 [CONTAINS] | Reduce Output Operator [RS_116] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2052,157 +2045,158 @@ Stage-0 | | keys:{"Reducer 26":"_col2 (type: string)","Map 31":"_col0 (type: string)"} | | outputColumnNames:["_col2","_col5"] | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 26 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_107] - | | key expressions:_col2 (type: string) - | | Map-reduce partition columns:_col2 (type: string) + | |<-Map 31 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_109] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE - | | Map Join Operator [MAPJOIN_166] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Reducer 26":"_col1 (type: string)","Map 30":"_col1 (type: string)"} - | | | outputColumnNames:["_col2"] - | | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 30 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_104] - | | | key expressions:_col1 (type: string) - | | | Map-reduce partition columns:_col1 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col0 (type: string) - | | | Select Operator [SEL_98] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_160] - | | | predicate:(value is not null and key is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_97] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_96] - | | outputColumnNames:["_col1"] - | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_95] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 25 [SIMPLE_EDGE] - | | |<-Map 29 [CONTAINS] - | | | Reduce Output Operator [RS_94] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Statistics:Num rows: 440 Data size: 4664 Basic stats: COMPLETE Column stats: NONE - | | | Group By Operator [GBY_93] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 440 Data size: 4664 Basic stats: COMPLETE Column stats: NONE - | | | Select Operator [SEL_89] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_159] - | | | predicate:value is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_88] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Reducer 24 [CONTAINS] - | | Reduce Output Operator [RS_94] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Statistics:Num rows: 440 Data size: 4664 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_93] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 440 Data size: 4664 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_86] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 23 [SIMPLE_EDGE] - | | |<-Map 28 [CONTAINS] - | | | Reduce Output Operator [RS_85] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE - | | | Group By Operator [GBY_84] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE - | | | Select Operator [SEL_80] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_158] - | | | predicate:value is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_79] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Reducer 22 [CONTAINS] - | | Reduce Output Operator [RS_85] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_84] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_77] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 21 [SIMPLE_EDGE] - | | |<-Map 20 [CONTAINS] - | | | Reduce Output Operator [RS_76] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | | Group By Operator [GBY_75] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | | Select Operator [SEL_69] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_156] - | | | predicate:value is not null (type: boolean) - | | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_68] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 27 [CONTAINS] - | | Reduce Output Operator [RS_76] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_75] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_71] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_157] - | | predicate:value is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_70] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_100] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_161] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_99] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 8 [CONTAINS] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_100] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_161] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_99] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map Join Operator [MAPJOIN_166] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Reducer 26":"_col1 (type: string)","Map 30":"_col1 (type: string)"} + | | outputColumnNames:["_col2"] + | | Statistics:Num rows: 242 Data size: 2565 Basic stats: COMPLETE Column stats: NONE + | |<-Map 30 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_104] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_98] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_160] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_97] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_96] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_95] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 220 Data size: 2332 Basic stats: COMPLETE Column stats: NONE + | |<-Union 25 [SIMPLE_EDGE] + | |<-Map 29 [CONTAINS] + | | Reduce Output Operator [RS_94] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Statistics:Num rows: 440 Data size: 4664 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_93] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 440 Data size: 4664 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_89] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_159] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_88] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 24 [CONTAINS] + | Reduce Output Operator [RS_94] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 440 Data size: 4664 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_93] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 440 Data size: 4664 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_86] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | |<-Union 23 [SIMPLE_EDGE] + | |<-Map 28 [CONTAINS] + | | Reduce Output Operator [RS_85] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_84] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_80] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_158] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_79] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 22 [CONTAINS] + | Reduce Output Operator [RS_85] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_84] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_77] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 21 [SIMPLE_EDGE] + | |<-Map 20 [CONTAINS] + | | Reduce Output Operator [RS_76] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_75] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_69] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_156] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_68] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Map 27 [CONTAINS] + | Reduce Output Operator [RS_76] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_75] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_71] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_157] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_70] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 5 [CONTAINS] Reduce Output Operator [RS_116] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2216,8 +2210,8 @@ Stage-0 | keys:KEY._col0 (type: string), KEY._col1 (type: string) | outputColumnNames:["_col0","_col1"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Union 7 [SIMPLE_EDGE] - |<-Map 19 [CONTAINS] + |<-Union 4 [SIMPLE_EDGE] + |<-Reducer 15 [CONTAINS] | Reduce Output Operator [RS_65] | key expressions:_col0 (type: string), _col1 (type: string) | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2235,123 +2229,124 @@ Stage-0 | | keys:{"Reducer 15":"_col2 (type: string)","Map 19":"_col0 (type: string)"} | | outputColumnNames:["_col2","_col5"] | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 15 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_56] - | | key expressions:_col2 (type: string) - | | Map-reduce partition columns:_col2 (type: string) + | |<-Map 19 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_58] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) | | sort order:+ - | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - | | Map Join Operator [MAPJOIN_164] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Reducer 15":"_col1 (type: string)","Map 18":"_col1 (type: string)"} - | | | outputColumnNames:["_col2"] - | | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 18 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_53] - | | | key expressions:_col1 (type: string) - | | | Map-reduce partition columns:_col1 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col0 (type: string) - | | | Select Operator [SEL_47] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_154] - | | | predicate:(value is not null and key is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_46] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_45] - | | outputColumnNames:["_col1"] - | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_44] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 14 [SIMPLE_EDGE] - | | |<-Map 17 [CONTAINS] - | | | Reduce Output Operator [RS_43] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE - | | | Group By Operator [GBY_42] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE - | | | Select Operator [SEL_38] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_153] - | | | predicate:value is not null (type: boolean) - | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_37] - | | | alias:y - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Reducer 13 [CONTAINS] - | | Reduce Output Operator [RS_43] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_42] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_35] - | | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | | |<-Union 12 [SIMPLE_EDGE] - | | |<-Map 11 [CONTAINS] - | | | Reduce Output Operator [RS_34] - | | | key expressions:_col0 (type: string), _col1 (type: string) - | | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | | sort order:++ - | | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | | Group By Operator [GBY_33] - | | | keys:_col0 (type: string), _col1 (type: string) - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | | Select Operator [SEL_27] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_151] - | | | predicate:value is not null (type: boolean) - | | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_26] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 16 [CONTAINS] - | | Reduce Output Operator [RS_34] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_33] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_29] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_152] - | | predicate:value is not null (type: boolean) - | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_28] - | | alias:y - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_49] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_155] - | predicate:key is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_48] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map 6 [CONTAINS] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_49] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_155] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_48] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map Join Operator [MAPJOIN_164] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Reducer 15":"_col1 (type: string)","Map 18":"_col1 (type: string)"} + | | outputColumnNames:["_col2"] + | | Statistics:Num rows: 209 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + | |<-Map 18 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_53] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string) + | | Select Operator [SEL_47] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_154] + | | predicate:(value is not null and key is not null) (type: boolean) + | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_46] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Select Operator [SEL_45] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_44] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 190 Data size: 2008 Basic stats: COMPLETE Column stats: NONE + | |<-Union 14 [SIMPLE_EDGE] + | |<-Map 17 [CONTAINS] + | | Reduce Output Operator [RS_43] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_42] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_38] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_153] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_37] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 13 [CONTAINS] + | Reduce Output Operator [RS_43] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_42] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 381 Data size: 4028 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_35] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + | |<-Union 12 [SIMPLE_EDGE] + | |<-Map 11 [CONTAINS] + | | Reduce Output Operator [RS_34] + | | key expressions:_col0 (type: string), _col1 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | | sort order:++ + | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | | Group By Operator [GBY_33] + | | keys:_col0 (type: string), _col1 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_27] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_151] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_26] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Map 16 [CONTAINS] + | Reduce Output Operator [RS_34] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_33] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_29] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_152] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_28] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [CONTAINS] Reduce Output Operator [RS_65] key expressions:_col0 (type: string), _col1 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string) @@ -2366,91 +2361,92 @@ Stage-0 Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator [MAPJOIN_163] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Reducer 3":"_col2 (type: string)","Map 6":"_col0 (type: string)"} + | keys:{"Reducer 3":"_col2 (type: string)","Map 10":"_col0 (type: string)"} | outputColumnNames:["_col2","_col5"] | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 3 [BROADCAST_EDGE] - | Reduce Output Operator [RS_21] - | key expressions:_col2 (type: string) - | Map-reduce partition columns:_col2 (type: string) + |<-Map 10 [BROADCAST_EDGE] + | Reduce Output Operator [RS_23] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) | sort order:+ - | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_162] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Reducer 3":"_col1 (type: string)","Map 5":"_col1 (type: string)"} - | | outputColumnNames:["_col2"] - | | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE - | |<-Map 5 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_18] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string) - | | Select Operator [SEL_12] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_149] - | | predicate:(value is not null and key is not null) (type: boolean) - | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_11] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_10] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_9] - | | keys:KEY._col0 (type: string), KEY._col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE - | |<-Union 2 [SIMPLE_EDGE] - | |<-Map 1 [CONTAINS] - | | Reduce Output Operator [RS_8] - | | key expressions:_col0 (type: string), _col1 (type: string) - | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | | sort order:++ - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Group By Operator [GBY_7] - | | keys:_col0 (type: string), _col1 (type: string) - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_147] - | | predicate:value is not null (type: boolean) - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_0] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | |<-Map 4 [CONTAINS] - | Reduce Output Operator [RS_8] - | key expressions:_col0 (type: string), _col1 (type: string) - | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) - | sort order:++ - | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | Group By Operator [GBY_7] - | keys:_col0 (type: string), _col1 (type: string) - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_3] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_148] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_14] - outputColumnNames:["_col0","_col1"] - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_150] - predicate:key is not null (type: boolean) - Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_13] - alias:y - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string) + | Select Operator [SEL_14] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_150] + | predicate:key is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_13] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_162] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 3":"_col1 (type: string)","Map 9":"_col1 (type: string)"} + | outputColumnNames:["_col2"] + | Statistics:Num rows: 144 Data size: 1509 Basic stats: COMPLETE Column stats: NONE + |<-Map 9 [BROADCAST_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string) + | Select Operator [SEL_12] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_149] + | predicate:(value is not null and key is not null) (type: boolean) + | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_11] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_10] + outputColumnNames:["_col1"] + Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_9] + | keys:KEY._col0 (type: string), KEY._col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 131 Data size: 1372 Basic stats: COMPLETE Column stats: NONE + |<-Union 2 [SIMPLE_EDGE] + |<-Map 1 [CONTAINS] + | Reduce Output Operator [RS_8] + | key expressions:_col0 (type: string), _col1 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + | sort order:++ + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Group By Operator [GBY_7] + | keys:_col0 (type: string), _col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_147] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:x + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Map 8 [CONTAINS] + Reduce Output Operator [RS_8] + key expressions:_col0 (type: string), _col1 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string) + sort order:++ + Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_7] + keys:_col0 (type: string), _col1 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_3] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_148] + predicate:value is not null (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:y + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -3037,14 +3033,15 @@ Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) Map 12 <- Union 9 (CONTAINS) Map 13 <- Union 9 (CONTAINS) Map 17 <- Map 16 (BROADCAST_EDGE) -Map 18 <- Map 17 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 19 <- Map 17 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 20 <- Map 17 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 21 <- Map 17 (BROADCAST_EDGE), Union 4 (CONTAINS) +Map 19 <- Union 20 (CONTAINS) +Map 21 <- Union 20 (CONTAINS) +Map 22 <- Union 20 (CONTAINS) +Map 23 <- Union 20 (CONTAINS) Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) Map 8 <- Union 9 (CONTAINS) Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS) +Reducer 18 <- Map 17 (SIMPLE_EDGE), Union 20 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Stage-0 @@ -3052,155 +3049,6 @@ Stage-0 limit:-1 Stage-1 Union 4 - |<-Map 18 [CONTAINS] - | File Output Operator [FS_75] - | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - | Select Operator [SEL_73] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_120] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 17":"_col1 (type: string)","Map 18":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col3"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<-Map 17 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_69] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Map Join Operator [MAPJOIN_119] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Map 16":"_col0 (type: string)","Map 17":"_col0 (type: string)"} - | | | outputColumnNames:["_col0","_col1","_col3"] - | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 16 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_64] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col1 (type: string) - | | | Select Operator [SEL_48] - | | | outputColumnNames:["_col0","_col1"] - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_109] - | | | predicate:(key is not null and value is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_47] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Select Operator [SEL_50] - | | outputColumnNames:["_col0","_col1"] - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_110] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_49] - | | alias:x - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | Reduce Output Operator [RS_125] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_119] - | | Reduce Output Operator [RS_126] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_119] - | | Reduce Output Operator [RS_127] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col3 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_119] - | |<-Select Operator [SEL_52] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_111] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_51] - | alias:x - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map 19 [CONTAINS] - | File Output Operator [FS_75] - | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - | Select Operator [SEL_73] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_120] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 17":"_col1 (type: string)","Map 19":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col3"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 17 [BROADCAST_EDGE] - | |<-Select Operator [SEL_54] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_112] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_53] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map 20 [CONTAINS] - | File Output Operator [FS_75] - | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - | Select Operator [SEL_73] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_120] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 17":"_col1 (type: string)","Map 20":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col3"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 17 [BROADCAST_EDGE] - | |<-Select Operator [SEL_58] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_113] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_57] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map 21 [CONTAINS] - | File Output Operator [FS_75] - | compressed:false - | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - | Select Operator [SEL_73] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_120] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 17":"_col1 (type: string)","Map 21":"_col0 (type: string)"} - | | outputColumnNames:["_col0","_col3"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 17 [BROADCAST_EDGE] - | |<-Select Operator [SEL_61] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_114] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_60] - | alias:y - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Reducer 11 [CONTAINS] | File Output Operator [FS_75] | compressed:false @@ -3303,6 +3151,117 @@ Stage-0 | TableScan [TS_21] | alias:x | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 18 [CONTAINS] + | File Output Operator [FS_75] + | compressed:false + | Statistics:Num rows: 1776 Data size: 18753 Basic stats: COMPLETE Column stats: NONE + | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + | Select Operator [SEL_73] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_120] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"0":"_col1 (type: string)","1":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col3"] + | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE + | |<-Map 17 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_69] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: string) + | | Map Join Operator [MAPJOIN_119] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 16":"_col0 (type: string)","Map 17":"_col0 (type: string)"} + | | | outputColumnNames:["_col0","_col1","_col3"] + | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 16 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_64] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: string) + | | | Select Operator [SEL_48] + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_109] + | | | predicate:(key is not null and value is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_47] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Select Operator [SEL_50] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_110] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_49] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Union 20 [SIMPLE_EDGE] + | |<-Map 19 [CONTAINS] + | | Reduce Output Operator [RS_71] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 763 Data size: 8067 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_52] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_111] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_51] + | | alias:x + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Map 21 [CONTAINS] + | | Reduce Output Operator [RS_71] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 763 Data size: 8067 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_54] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_112] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_53] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map 22 [CONTAINS] + | | Reduce Output Operator [RS_71] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 763 Data size: 8067 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_58] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_113] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_57] + | | alias:y + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map 23 [CONTAINS] + | Reduce Output Operator [RS_71] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 763 Data size: 8067 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_61] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_114] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_60] + | alias:y + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE |<-Reducer 3 [CONTAINS] File Output Operator [FS_75] compressed:false @@ -3970,16 +3929,17 @@ Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) Map 11 <- Union 9 (CONTAINS) Map 12 <- Union 9 (CONTAINS) -Map 16 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 17 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 18 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 19 <- Map 21 (BROADCAST_EDGE), Union 4 (CONTAINS) -Map 21 <- Map 20 (BROADCAST_EDGE) +Map 16 <- Union 17 (CONTAINS) +Map 19 <- Union 17 (CONTAINS) +Map 20 <- Union 17 (CONTAINS) +Map 21 <- Union 17 (CONTAINS) +Map 23 <- Map 22 (BROADCAST_EDGE) Map 5 <- Union 2 (CONTAINS) Map 7 <- Map 6 (BROADCAST_EDGE) Map 8 <- Union 9 (CONTAINS) Reducer 10 <- Reducer 14 (SIMPLE_EDGE), Union 4 (CONTAINS), Union 9 (SIMPLE_EDGE) Reducer 14 <- Map 13 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) +Reducer 18 <- Map 23 (SIMPLE_EDGE), Union 17 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Stage-5 @@ -3991,189 +3951,6 @@ Stage-5 Dependency Collection{} Stage-3 Union 4 - |<-Map 16 [CONTAINS] - | File Output Operator [FS_62] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a"} - | Select Operator [SEL_60] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_108] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 16":"_col1 (type: string)"} - | | outputColumnNames:["_col0","_col6"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<-Map 21 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_56] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col6 (type: string) - | | Map Join Operator [MAPJOIN_105] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"Map 20":"key (type: string)","Map 21":"key (type: string)"} - | | | outputColumnNames:["_col0","_col1","_col6"] - | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 20 [BROADCAST_EDGE] - | | | Reduce Output Operator [RS_51] - | | | key expressions:key (type: string) - | | | Map-reduce partition columns:key (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:value (type: string) - | | | Filter Operator [FIL_101] - | | | predicate:(key is not null and value is not null) (type: boolean) - | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_48] - | | | alias:x - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Filter Operator [FIL_102] - | | predicate:key is not null (type: boolean) - | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_49] - | | alias:y - | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | Reduce Output Operator [RS_111] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col6 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_105] - | | Reduce Output Operator [RS_112] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col6 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_105] - | | Reduce Output Operator [RS_113] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col0 (type: string), _col6 (type: string) - | | Please refer to the previous Map Join Operator [MAPJOIN_105] - | |<-Select Operator [SEL_40] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_97] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_39] - | alias:src1 - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | File Output Operator [FS_64] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b"} - | Please refer to the previous Select Operator [SEL_60] - | File Output Operator [FS_66] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c"} - | Please refer to the previous Select Operator [SEL_60] - |<-Map 17 [CONTAINS] - | File Output Operator [FS_62] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a"} - | Select Operator [SEL_60] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_108] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 17":"_col1 (type: string)"} - | | outputColumnNames:["_col0","_col6"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Select Operator [SEL_42] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_98] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_41] - | alias:src - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | File Output Operator [FS_64] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b"} - | Please refer to the previous Select Operator [SEL_60] - | File Output Operator [FS_66] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c"} - | Please refer to the previous Select Operator [SEL_60] - |<-Map 18 [CONTAINS] - | File Output Operator [FS_62] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a"} - | Select Operator [SEL_60] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_108] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 18":"_col1 (type: string)"} - | | outputColumnNames:["_col0","_col6"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Select Operator [SEL_45] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_99] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_44] - | alias:src - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | File Output Operator [FS_64] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b"} - | Please refer to the previous Select Operator [SEL_60] - | File Output Operator [FS_66] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c"} - | Please refer to the previous Select Operator [SEL_60] - |<-Map 19 [CONTAINS] - | File Output Operator [FS_62] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a"} - | Select Operator [SEL_60] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | Map Join Operator [MAPJOIN_108] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 21":"_col1 (type: string)","Map 19":"_col1 (type: string)"} - | | outputColumnNames:["_col0","_col6"] - | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE - | |<- Please refer to the previous Map 21 [BROADCAST_EDGE] - | |<-Select Operator [SEL_47] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_100] - | predicate:value is not null (type: boolean) - | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_46] - | alias:src - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | File Output Operator [FS_64] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b"} - | Please refer to the previous Select Operator [SEL_60] - | File Output Operator [FS_66] - | compressed:false - | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE - | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c"} - | Please refer to the previous Select Operator [SEL_60] |<-Reducer 10 [CONTAINS] | File Output Operator [FS_62] | compressed:false @@ -4281,6 +4058,121 @@ Stage-5 | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c"} | Please refer to the previous Select Operator [SEL_37] + |<-Reducer 18 [CONTAINS] + | File Output Operator [FS_62] + | compressed:false + | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE + | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.a"} + | Select Operator [SEL_60] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_108] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"0":"_col1 (type: string)","1":"_col1 (type: string)"} + | | outputColumnNames:["_col0","_col6"] + | | Statistics:Num rows: 839 Data size: 8873 Basic stats: COMPLETE Column stats: NONE + | |<-Map 23 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_56] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col6 (type: string) + | | Map Join Operator [MAPJOIN_105] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"Map 22":"key (type: string)","Map 23":"key (type: string)"} + | | | outputColumnNames:["_col0","_col1","_col6"] + | | | Statistics:Num rows: 14 Data size: 108 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 22 [BROADCAST_EDGE] + | | | Reduce Output Operator [RS_51] + | | | key expressions:key (type: string) + | | | Map-reduce partition columns:key (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:value (type: string) + | | | Filter Operator [FIL_101] + | | | predicate:(key is not null and value is not null) (type: boolean) + | | | Statistics:Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_48] + | | | alias:x + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Filter Operator [FIL_102] + | | predicate:key is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_49] + | | alias:y + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Union 17 [SIMPLE_EDGE] + | |<-Map 16 [CONTAINS] + | | Reduce Output Operator [RS_58] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 763 Data size: 8067 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_40] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_97] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_39] + | | alias:src1 + | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | |<-Map 19 [CONTAINS] + | | Reduce Output Operator [RS_58] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 763 Data size: 8067 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_42] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_98] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_41] + | | alias:src + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map 20 [CONTAINS] + | | Reduce Output Operator [RS_58] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 763 Data size: 8067 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_45] + | | outputColumnNames:["_col1"] + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_99] + | | predicate:value is not null (type: boolean) + | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_44] + | | alias:src + | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | |<-Map 21 [CONTAINS] + | Reduce Output Operator [RS_58] + | key expressions:_col1 (type: string) + | Map-reduce partition columns:_col1 (type: string) + | sort order:+ + | Statistics:Num rows: 763 Data size: 8067 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_47] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_100] + | predicate:value is not null (type: boolean) + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_46] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | File Output Operator [FS_64] + | compressed:false + | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE + | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.b"} + | Please refer to the previous Select Operator [SEL_60] + | File Output Operator [FS_66] + | compressed:false + | Statistics:Num rows: 1705 Data size: 18038 Basic stats: COMPLETE Column stats: NONE + | table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe","name:":"default.c"} + | Please refer to the previous Select Operator [SEL_60] |<-Reducer 3 [CONTAINS] File Output Operator [FS_62] compressed:false @@ -4372,15 +4264,6 @@ Stage-8 Stage-11 Stats-Aggr Operator Please refer to the previous Stage-0 -Stage-14 - Stats-Aggr Operator - Please refer to the previous Stage-0 -Stage-17 - Stats-Aggr Operator - Please refer to the previous Stage-0 -Stage-20 - Stats-Aggr Operator - Please refer to the previous Stage-0 Stage-6 Stats-Aggr Operator Stage-1 @@ -4393,15 +4276,6 @@ Stage-9 Stage-12 Stats-Aggr Operator Please refer to the previous Stage-1 -Stage-15 - Stats-Aggr Operator - Please refer to the previous Stage-1 -Stage-18 - Stats-Aggr Operator - Please refer to the previous Stage-1 -Stage-21 - Stats-Aggr Operator - Please refer to the previous Stage-1 Stage-7 Stats-Aggr Operator Stage-2 @@ -4414,15 +4288,6 @@ Stage-10 Stage-13 Stats-Aggr Operator Please refer to the previous Stage-2 -Stage-16 - Stats-Aggr Operator - Please refer to the previous Stage-2 -Stage-19 - Stats-Aggr Operator - Please refer to the previous Stage-2 -Stage-22 - Stats-Aggr Operator - Please refer to the previous Stage-2 PREHOOK: query: explain FROM diff --git ql/src/test/results/clientpositive/tez/mrr.q.out ql/src/test/results/clientpositive/tez/mrr.q.out index faace21..65bf54c 100644 --- ql/src/test/results/clientpositive/tez/mrr.q.out +++ ql/src/test/results/clientpositive/tez/mrr.q.out @@ -1667,9 +1667,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 3 <- Reducer 2 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1692,7 +1691,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -1704,22 +1703,12 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: Group By Operator @@ -1728,13 +1717,23 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 4 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: string), _col3 (type: string) + Reducer 3 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: string), VALUE._col2 (type: string) diff --git ql/src/test/results/clientpositive/tez/unionDistinct_1.q.out ql/src/test/results/clientpositive/tez/unionDistinct_1.q.out index bcae2fc..306bf08 100644 --- ql/src/test/results/clientpositive/tez/unionDistinct_1.q.out +++ ql/src/test/results/clientpositive/tez/unionDistinct_1.q.out @@ -11104,8 +11104,7 @@ STAGE PLANS: Edges: Map 1 <- Union 2 (CONTAINS) Map 4 <- Union 2 (CONTAINS) - Map 5 <- Reducer 3 (BROADCAST_EDGE) - Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -11168,28 +11167,11 @@ STAGE PLANS: Filter Operator predicate: (key = 97) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '97' (type: string) - 1 '97' (type: string) - outputColumnNames: _col6 - input vertices: - 1 Reducer 3 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: '97' (type: string), _col6 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: '97' (type: string) + sort order: + + Map-reduce partition columns: '97' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Group By Operator @@ -11201,12 +11183,28 @@ STAGE PLANS: expressions: _col1 (type: string) outputColumnNames: _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: '97' (type: string) - sort order: + - Map-reduce partition columns: '97' (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '97' (type: string) + 1 '97' (type: string) + outputColumnNames: _col6 + input vertices: + 0 Map 5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: '97' (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 diff --git ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index f0ddc5b..dd56942 100644 --- ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -4178,7 +4178,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### 1000 -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: -- parent is reduce tasks EXPLAIN select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY @@ -4193,9 +4193,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Reducer 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4206,26 +4205,10 @@ STAGE PLANS: Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Select Operator Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - input vertices: - 1 Reducer 4 - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map 2 Map Operator Tree: TableScan alias: srcpart @@ -4245,7 +4228,36 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + input vertices: + 0 Map 1 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4260,19 +4272,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -4280,7 +4279,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Reducer 3' is a cross product PREHOOK: query: select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -4721,9 +4720,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Map 3 <- Map 2 (BROADCAST_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4759,13 +4757,27 @@ STAGE PLANS: 0 Map 1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE HybridGraceHashJoin: true - Reduce Output Operator - key expressions: '13' (type: string) - sort order: + - Map-reduce partition columns: '13' (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 '13' (type: string) + 1 '13' (type: string) + input vertices: + 1 Map 4 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: vectorized - Map 3 + Map 4 Map Operator Tree: TableScan alias: srcpart_hour @@ -4774,26 +4786,12 @@ STAGE PLANS: Filter Operator predicate: (hr = 13) (type: boolean) Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 '13' (type: string) - 1 '13' (type: string) - input vertices: - 0 Map 2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + key expressions: '13' (type: string) + sort order: + + Map-reduce partition columns: '13' (type: string) + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0)