diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java index 3d664c1..44b8b19 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorUtils.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.NodeUtils.Function; import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.mapred.OutputCollector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -297,4 +298,22 @@ public static int countOperatorsUpstream(Operator start, Set op) { + long cumulativeCardinality = 0L; + for (Operator inputOp : op.getParentOperators()) { + Long inputCardinality = computeCumulativeCardinality(inputOp); + if (inputCardinality == null) { + return null; + } + cumulativeCardinality += inputCardinality; + } + Statistics currInputStat = op.getStatistics(); + if (currInputStat == null) { + LOG.warn("Couldn't get statistics from: " + op); + return null; + } + cumulativeCardinality += currInputStat.getNumRows(); + return cumulativeCardinality; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index 00bc193..e4fff29 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -27,27 +27,21 @@ import java.util.Set; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; -import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; -import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.MuxOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.OperatorUtils; -import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; -import org.apache.hadoop.hive.ql.exec.UDTFOperator; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -64,8 +58,8 @@ import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.util.ReflectionUtils; - -import com.google.common.collect.ImmutableSet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * ConvertJoinMapJoin is an optimization that replaces a common join @@ -78,16 +72,6 @@ private static final Logger LOG = LoggerFactory.getLogger(ConvertJoinMapJoin.class.getName()); - @SuppressWarnings({ "unchecked", "rawtypes" }) - private static final Set>> COSTLY_OPERATORS = - new ImmutableSet.Builder() - .add(CommonJoinOperator.class) - .add(GroupByOperator.class) - .add(LateralViewJoinOperator.class) - .add(PTFOperator.class) - .add(ReduceSinkOperator.class) - .add(UDTFOperator.class) - .build(); @Override /* @@ -144,11 +128,13 @@ return null; } } + + // We need to calculate back the map join conversion pos with no bucket scaling. + mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1); } LOG.info("Convert to non-bucketed map join"); // check if we can convert to map join no bucket scaling. - mapJoinConversionPos = getMapJoinConversionPos(joinOp, context, 1); if (mapJoinConversionPos < 0) { // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. @@ -557,8 +543,8 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); int bigTablePosition = -1; - // number of costly ops (Join, GB, PTF/Windowing, TF) below the big input - int bigInputNumberCostlyOps = -1; + // big input cumulative row count + Long bigInputCumulativeCardinality = -1L; // stats of the big input Statistics bigInputStat = null; @@ -602,18 +588,27 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c } } - int currentInputNumberCostlyOps = foundInputNotFittingInMemory ? - -1 : OperatorUtils.countOperatorsUpstream(parentOp, COSTLY_OPERATORS); + long currentInputCumulativeCardinality; + if (foundInputNotFittingInMemory) { + currentInputCumulativeCardinality = -1L; + } else { + Long cardinality = OperatorUtils.computeCumulativeCardinality(parentOp); + if (cardinality == null) { + // We could not get stats, we cannot convert + return -1; + } + currentInputCumulativeCardinality = cardinality; + } // This input is the big table if it is contained in the big candidates set, and either: // 1) we have not chosen a big table yet, or // 2) it has been chosen as the big table above, or - // 3) the number of costly operators for this input is higher, or - // 4) the number of costly operators is equal, but the size is bigger, + // 3) the cumulative cardinality for this input is higher, or + // 4) the cumulative cardinality is equal, but the size is bigger, boolean selectedBigTable = bigTableCandidateSet.contains(pos) && (bigInputStat == null || currentInputNotFittingInMemory || - (!foundInputNotFittingInMemory && (currentInputNumberCostlyOps > bigInputNumberCostlyOps || - (currentInputNumberCostlyOps == bigInputNumberCostlyOps && inputSize > bigInputStat.getDataSize())))); + (!foundInputNotFittingInMemory && (currentInputCumulativeCardinality > bigInputCumulativeCardinality || + (currentInputCumulativeCardinality == bigInputCumulativeCardinality && inputSize > bigInputStat.getDataSize())))); if (bigInputStat != null && selectedBigTable) { // We are replacing the current big table with a new one, thus @@ -633,7 +628,7 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c if (selectedBigTable) { bigTablePosition = pos; - bigInputNumberCostlyOps = currentInputNumberCostlyOps; + bigInputCumulativeCardinality = currentInputCumulativeCardinality; bigInputStat = currInputStat; } diff --git ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index c743edc..8f054f2 100644 --- ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -344,7 +344,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -384,15 +385,31 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap Reducer 2 - Execution mode: uber + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -404,28 +421,12 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -953,7 +954,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -993,15 +995,31 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap Reducer 2 - Execution mode: uber + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1009,28 +1027,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator @@ -1057,7 +1059,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1091,15 +1094,31 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap Reducer 2 - Execution mode: uber + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1107,28 +1126,12 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 729e963..71a9d90 100644 --- ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -4900,8 +4900,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Reducer 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4914,13 +4915,27 @@ STAGE PLANS: expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 '2008-04-08' (type: string) + input vertices: + 1 Reducer 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: llap - Map 2 + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -4937,36 +4952,7 @@ STAGE PLANS: Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: '2008-04-08' (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 '2008-04-08' (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 2 Execution mode: uber Reduce Operator Tree: Group By Operator @@ -4981,6 +4967,36 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: '2008-04-08' (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string) + sort order: + + Map-reduce partition columns: '2008-04-08' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Target Vertex: Map 1 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 42f21fb..b295ae7 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -4535,8 +4535,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Reducer 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4549,13 +4550,27 @@ STAGE PLANS: expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 '2008-04-08' (type: string) + input vertices: + 1 Reducer 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: llap - Map 2 + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -4572,36 +4587,7 @@ STAGE PLANS: Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: llap - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - keys: '2008-04-08' (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 '2008-04-08' (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 2 Execution mode: vectorized, uber Reduce Operator Tree: Group By Operator @@ -4616,6 +4602,36 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: '2008-04-08' (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string) + sort order: + + Map-reduce partition columns: '2008-04-08' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Target Vertex: Map 1 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out index 5df187d..8bd9dc8 100644 --- ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out +++ ql/src/test/results/clientpositive/tez/auto_sortmerge_join_10.q.out @@ -239,8 +239,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -256,6 +256,23 @@ STAGE PLANS: mode: final outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -263,7 +280,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) input vertices: - 1 Map 3 + 0 Map 1 Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Group By Operator @@ -275,24 +292,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Map 3 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out index 882eff3..2e10157 100644 --- ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out +++ ql/src/test/results/clientpositive/tez/bucket_map_join_tez1.q.out @@ -338,7 +338,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -377,12 +378,28 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 2 Reduce Operator Tree: Group By Operator @@ -395,28 +412,12 @@ STAGE PLANS: expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -930,7 +931,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -969,12 +971,28 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 2 Reduce Operator Tree: Group By Operator @@ -983,28 +1001,12 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator @@ -1031,7 +1033,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1064,12 +1067,28 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 2 Reduce Operator Tree: Group By Operator @@ -1078,28 +1097,12 @@ STAGE PLANS: mode: complete outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Select Operator - expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 1352 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out index f00be43..09f2fc5 100644 --- ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out +++ ql/src/test/results/clientpositive/tez/cross_product_check_2.q.out @@ -91,7 +91,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Map 3' is a cross product PREHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A PREHOOK: type: QUERY POSTHOOK: query: explain select * from B d1 join B d2 on d1.key = d2.key join A @@ -105,7 +105,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -131,23 +132,10 @@ STAGE PLANS: 1 Map 2 Statistics: Num rows: 11 Data size: 105 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 11 Data size: 105 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) Map 2 Map Operator Tree: TableScan @@ -175,10 +163,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -186,7 +187,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[27][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 on d1.key = d2.key @@ -206,8 +207,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Reducer 3 (BROADCAST_EDGE) Map 2 <- Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -219,10 +221,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 2 Map Operator Tree: TableScan @@ -280,23 +295,10 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -305,7 +307,7 @@ STAGE PLANS: ListSink Warning: Map Join MAPJOIN[21][bigTable=?] in task 'Map 2' is a cross product -Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[22][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1 PREHOOK: type: QUERY POSTHOOK: query: explain select * from A join (select d1.key from B d1 join B d2 where 1 = 1 group by d1.key) od1 @@ -319,8 +321,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Reducer 3 (BROADCAST_EDGE) Map 2 <- Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -332,10 +335,23 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 2 Map Operator Tree: TableScan @@ -382,23 +398,10 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -406,7 +409,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Reducer 4' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Reducer 2' is a cross product PREHOOK: query: explain select * from (select A.key from A group by key) ss join (select d1.key from B d1 join B d2 on d1.key = d2.key where 1 = 1 group by d1.key) od1 @@ -425,8 +428,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 3 <- Map 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -505,17 +508,6 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -524,7 +516,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 0 Reducer 2 + 1 Reducer 4 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -533,6 +525,17 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 5 Data size: 47 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out index fb16b40..f0b3904 100644 --- ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/tez/dynamic_partition_pruning.q.out @@ -4752,8 +4752,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Reducer 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4766,12 +4767,26 @@ STAGE PLANS: expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 '2008-04-08' (type: string) + input vertices: + 1 Reducer 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -4787,35 +4802,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Reduce Operator Tree: - Group By Operator - keys: '2008-04-08' (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 '2008-04-08' (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -4829,6 +4816,35 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + keys: '2008-04-08' (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string) + sort order: + + Map-reduce partition columns: '2008-04-08' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Target Vertex: Map 1 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/explainuser_2.q.out ql/src/test/results/clientpositive/tez/explainuser_2.q.out index f2b225d..db1c5b5 100644 --- ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -1018,9 +1018,9 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 2 (BROADCAST_EDGE) Map 10 <- Map 9 (BROADCAST_EDGE) -Map 3 <- Map 1 (BROADCAST_EDGE), Map 10 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE) +Map 3 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) Reducer 4 <- Map 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) @@ -1049,26 +1049,26 @@ Stage-0 Output:["_col2","_col12","_col20","_col13","_col21","_col3"] Map Join Operator [MAPJOIN_97] (rows=1610 width=10) Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col12","_col13","_col20","_col21"] - <-Map 1 [BROADCAST_EDGE] + <-Map 2 [BROADCAST_EDGE] BROADCAST [RS_44] PartitionCols:_col1, _col3 Map Join Operator [MAPJOIN_91] (rows=275 width=10) - Conds:SEL_2._col0=RS_42._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] - <-Map 2 [BROADCAST_EDGE] - BROADCAST [RS_42] + Conds:RS_41._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] + <-Map 1 [BROADCAST_EDGE] + BROADCAST [RS_41] PartitionCols:_col0 - Select Operator [SEL_5] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_84] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_2] (rows=170 width=34) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=170 width=34) - predicate:((v2 is not null and v3 is not null) and k1 is not null) - TableScan [TS_0] (rows=170 width=34) - default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + Select Operator [SEL_2] (rows=170 width=34) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_83] (rows=170 width=34) + predicate:((v2 is not null and v3 is not null) and k1 is not null) + TableScan [TS_0] (rows=170 width=34) + default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + <-Select Operator [SEL_5] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_84] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] <-Select Operator [SEL_40] (rows=1464 width=10) Output:["_col14","_col15","_col17","_col6","_col7"] Map Join Operator [MAPJOIN_96] (rows=1464 width=10) diff --git ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out index 037c338..5c3e198 100644 --- ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. explain select * @@ -18,18 +18,19 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) -Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 - Reducer 4 vectorized + Reducer 2 vectorized File Output Operator [FS_34] Select Operator [OP_33] (rows=302 width=10) Output:["_col0","_col1"] - <-Reducer 3 [SIMPLE_EDGE] vectorized + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_22] Select Operator [SEL_21] (rows=302 width=10) Output:["_col0","_col1"] @@ -46,28 +47,28 @@ Stage-0 default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key"] <-Map Join Operator [MAPJOIN_28] (rows=550 width=10) Conds:(Inner),Output:["_col0","_col1"] - <-Map 1 [BROADCAST_EDGE] - BROADCAST [RS_14] - Select Operator [SEL_1] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=10) - default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_10] (rows=1 width=8) - Filter Operator [FIL_9] (rows=1 width=8) - predicate:(_col0 = 0) - Group By Operator [OP_32] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 2 [SIMPLE_EDGE] - SHUFFLE [RS_6] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count()"] - Select Operator [SEL_4] (rows=250 width=10) - Filter Operator [FIL_26] (rows=250 width=10) - predicate:key is null - TableScan [TS_2] (rows=500 width=10) - default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Reducer 4 [BROADCAST_EDGE] vectorized + BROADCAST [RS_15] + Select Operator [SEL_10] (rows=1 width=8) + Filter Operator [FIL_9] (rows=1 width=8) + predicate:(_col0 = 0) + Group By Operator [OP_32] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 3 [SIMPLE_EDGE] + SHUFFLE [RS_6] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count()"] + Select Operator [SEL_4] (rows=250 width=10) + Filter Operator [FIL_26] (rows=250 width=10) + predicate:key is null + TableScan [TS_2] (rows=500 width=10) + default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key"] + <-Select Operator [SEL_1] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=10) + default@src,src,Tbl:COMPLETE,Col:NONE,Output:["key","value"] -Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross product +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: select * from src where not key in diff --git ql/src/test/results/clientpositive/tez/vector_inner_join.q.out ql/src/test/results/clientpositive/tez/vector_inner_join.q.out index 5ff2cd1..45554f7 100644 --- ql/src/test/results/clientpositive/tez/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/tez/vector_inner_join.q.out @@ -47,7 +47,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -62,24 +62,6 @@ STAGE PLANS: expressions: c (type: int) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -88,7 +70,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1 input vertices: - 0 Map 1 + 1 Map 2 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Select Operator @@ -103,6 +85,24 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (a > 2) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -352,7 +352,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -367,6 +367,25 @@ STAGE PLANS: expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c > 2) (type: boolean) + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int), v2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -375,7 +394,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 0 Map 1 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true File Output Operator @@ -386,25 +405,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -708,7 +708,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -723,6 +723,25 @@ STAGE PLANS: expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c > 2) (type: boolean) + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int), v2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -731,7 +750,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col2, _col3 input vertices: - 1 Map 2 + 0 Map 1 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Select Operator @@ -746,25 +765,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -798,7 +798,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -813,6 +813,25 @@ STAGE PLANS: expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (c > 2) (type: boolean) + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c (type: int), v2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -821,7 +840,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 0 Map 1 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true Select Operator @@ -836,25 +855,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized - Map 2 - Map Operator Tree: - TableScan - alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: c (type: int), v2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out index 46e17e0..9cdda9d 100644 --- ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_dynamic_partition_pruning.q.out @@ -4469,8 +4469,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 1 <- Reducer 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4483,12 +4484,26 @@ STAGE PLANS: expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 '2008-04-08' (type: string) + input vertices: + 1 Reducer 4 + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Map 3 Map Operator Tree: TableScan alias: srcpart @@ -4504,36 +4519,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: '2008-04-08' (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - keys: '2008-04-08' (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 '2008-04-08' (type: string) - input vertices: - 0 Map 1 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reducer 2 Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -4548,6 +4534,36 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: '2008-04-08' (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string) + sort order: + + Map-reduce partition columns: '2008-04-08' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: '2008-04-08' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target column: ds (string) + Target Input: srcpart + Partition key expr: ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Target Vertex: Map 1 Stage: Stage-0 Fetch Operator