Index: ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q =================================================================== --- ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q (revision 1627853) +++ ql/src/test/queries/clientpositive/dynamic_partition_pruning_2.q (working copy) @@ -19,6 +19,9 @@ load data local inpath '../../data/files/agg_01-p2.txt' into table agg_01 partition (dim_shops_id=2); load data local inpath '../../data/files/agg_01-p3.txt' into table agg_01 partition (dim_shops_id=3); +analyze table dim_shops compute statistics; +analyze table agg_01 partition (dim_shops_id) compute statistics; + select * from dim_shops; select * from agg_01; @@ -40,6 +43,73 @@ GROUP BY d1.label ORDER BY d1.label; +set hive.tez.dynamic.partition.pruning.max.event.size=1000000; +set hive.tez.dynamic.partition.pruning.max.data.size=1; + +EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +EXPLAIN SELECT d1.label +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id; + +SELECT d1.label +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id; + +EXPLAIN SELECT agg.amount +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and agg.dim_shops_id = 1; + +SELECT agg.amount +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and agg.dim_shops_id = 1; + +set hive.tez.dynamic.partition.pruning.max.event.size=1; +set hive.tez.dynamic.partition.pruning.max.data.size=1000000; + +EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label; + +set hive.tez.dynamic.partition.pruning.max.event.size=100000; +set hive.tez.dynamic.partition.pruning.max.data.size=1000000; + EXPLAIN SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'foo' UNION ALL @@ -47,4 +117,4 @@ SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'foo' UNION ALL -SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'bar'; \ No newline at end of file +SELECT amount FROM agg_01, dim_shops WHERE dim_shops_id = id AND label = 'bar'; Index: ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out =================================================================== --- ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out (revision 1627853) +++ ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out (working copy) @@ -67,6 +67,34 @@ POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@agg_01@dim_shops_id=3 +PREHOOK: query: analyze table dim_shops compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@dim_shops +PREHOOK: Output: default@dim_shops +POSTHOOK: query: analyze table dim_shops compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dim_shops +POSTHOOK: Output: default@dim_shops +PREHOOK: query: analyze table agg_01 partition (dim_shops_id) compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@agg_01 +PREHOOK: Input: default@agg_01@dim_shops_id=1 +PREHOOK: Input: default@agg_01@dim_shops_id=2 +PREHOOK: Input: default@agg_01@dim_shops_id=3 +PREHOOK: Output: default@agg_01 +PREHOOK: Output: default@agg_01@dim_shops_id=1 +PREHOOK: Output: default@agg_01@dim_shops_id=2 +PREHOOK: Output: default@agg_01@dim_shops_id=3 +POSTHOOK: query: analyze table agg_01 partition (dim_shops_id) compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agg_01 +POSTHOOK: Input: default@agg_01@dim_shops_id=1 +POSTHOOK: Input: default@agg_01@dim_shops_id=2 +POSTHOOK: Input: default@agg_01@dim_shops_id=3 +POSTHOOK: Output: default@agg_01 +POSTHOOK: Output: default@agg_01@dim_shops_id=1 +POSTHOOK: Output: default@agg_01@dim_shops_id=2 +POSTHOOK: Output: default@agg_01@dim_shops_id=3 PREHOOK: query: select * from dim_shops PREHOOK: type: QUERY PREHOOK: Input: default@dim_shops @@ -137,29 +165,29 @@ TableScan alias: d1 filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 0 Data size: 18 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: id (type: int) sort order: + Map-reduce partition columns: id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE value expressions: label (type: string) Select Operator expressions: id (type: int) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target Input: agg Partition key expr: dim_shops_id - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Target column: dim_shops_id Target Vertex: Map 2 Map 2 @@ -167,7 +195,7 @@ TableScan alias: agg filterExpr: dim_shops_id is not null (type: boolean) - Statistics: Num rows: 0 Data size: 36 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -180,25 +208,552 @@ outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 1 - Statistics: Num rows: 0 Data size: 39 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col0 (type: decimal(10,0)) + outputColumnNames: _col6, _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(_col0) + keys: _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label +PREHOOK: type: QUERY +PREHOOK: Input: default@agg_01 +PREHOOK: Input: default@agg_01@dim_shops_id=1 +PREHOOK: Input: default@agg_01@dim_shops_id=2 +PREHOOK: Input: default@agg_01@dim_shops_id=3 +PREHOOK: Input: default@dim_shops +#### A masked pattern was here #### +POSTHOOK: query: SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agg_01 +POSTHOOK: Input: default@agg_01@dim_shops_id=1 +POSTHOOK: Input: default@agg_01@dim_shops_id=2 +POSTHOOK: Input: default@agg_01@dim_shops_id=3 +POSTHOOK: Input: default@dim_shops +#### A masked pattern was here #### +bar 3 15 +foo 3 6 +PREHOOK: query: EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: label (type: string) + Map 2 + Map Operator Tree: + TableScan + alias: agg + filterExpr: dim_shops_id is not null (type: boolean) + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {amount} {dim_shops_id} + 1 {id} {label} + keys: + 0 dim_shops_id (type: int) + 1 id (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string), _col0 (type: decimal(10,0)) + outputColumnNames: _col6, _col0 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(_col0) + keys: _col6 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label +PREHOOK: type: QUERY +PREHOOK: Input: default@agg_01 +PREHOOK: Input: default@agg_01@dim_shops_id=1 +PREHOOK: Input: default@agg_01@dim_shops_id=2 +PREHOOK: Input: default@agg_01@dim_shops_id=3 +PREHOOK: Input: default@dim_shops +#### A masked pattern was here #### +POSTHOOK: query: SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agg_01 +POSTHOOK: Input: default@agg_01@dim_shops_id=1 +POSTHOOK: Input: default@agg_01@dim_shops_id=2 +POSTHOOK: Input: default@agg_01@dim_shops_id=3 +POSTHOOK: Input: default@dim_shops +#### A masked pattern was here #### +bar 3 15 +foo 3 6 +PREHOOK: query: EXPLAIN SELECT d1.label +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT d1.label +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: label (type: string) + Map 2 + Map Operator Tree: + TableScan + alias: agg + filterExpr: dim_shops_id is not null (type: boolean) + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {dim_shops_id} + 1 {id} {label} + keys: + 0 dim_shops_id (type: int) + 1 id (type: int) + outputColumnNames: _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 = _col5) (type: boolean) + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col6 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT d1.label +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +PREHOOK: type: QUERY +PREHOOK: Input: default@agg_01 +PREHOOK: Input: default@agg_01@dim_shops_id=1 +PREHOOK: Input: default@agg_01@dim_shops_id=2 +PREHOOK: Input: default@agg_01@dim_shops_id=3 +PREHOOK: Input: default@dim_shops +#### A masked pattern was here #### +POSTHOOK: query: SELECT d1.label +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agg_01 +POSTHOOK: Input: default@agg_01@dim_shops_id=1 +POSTHOOK: Input: default@agg_01@dim_shops_id=2 +POSTHOOK: Input: default@agg_01@dim_shops_id=3 +POSTHOOK: Input: default@dim_shops +#### A masked pattern was here #### +foo +foo +foo +bar +bar +bar +baz +baz +baz +PREHOOK: query: EXPLAIN SELECT agg.amount +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and agg.dim_shops_id = 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT agg.amount +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and agg.dim_shops_id = 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: (id is not null and (id = 1)) (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id is not null and (id = 1)) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: agg + filterExpr: (dim_shops_id is not null and (dim_shops_id = 1)) (type: boolean) + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {amount} {dim_shops_id} + 1 + keys: + 0 dim_shops_id (type: int) + 1 1 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = 1) and (_col1 = 1)) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator + expressions: _col0 (type: decimal(10,0)) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT agg.amount +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and agg.dim_shops_id = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@agg_01 +PREHOOK: Input: default@agg_01@dim_shops_id=1 +PREHOOK: Input: default@dim_shops +#### A masked pattern was here #### +POSTHOOK: query: SELECT agg.amount +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and agg.dim_shops_id = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agg_01 +POSTHOOK: Input: default@agg_01@dim_shops_id=1 +POSTHOOK: Input: default@dim_shops +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT d1.label, count(*), sum(agg.amount) +FROM agg_01 agg, +dim_shops d1 +WHERE agg.dim_shops_id = d1.id +and +d1.label in ('foo', 'bar') +GROUP BY d1.label +ORDER BY d1.label +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: d1 + filterExpr: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (id is not null and (label) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: id (type: int) + sort order: + + Map-reduce partition columns: id (type: int) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + value expressions: label (type: string) + Select Operator + expressions: id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Dynamic Partitioning Event Operator + Target Input: agg + Partition key expr: dim_shops_id + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Target column: dim_shops_id + Target Vertex: Map 2 + Map 2 + Map Operator Tree: + TableScan + alias: agg + filterExpr: dim_shops_id is not null (type: boolean) + Statistics: Num rows: 9 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {amount} {dim_shops_id} + 1 {id} {label} + keys: + 0 dim_shops_id (type: int) + 1 id (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 1 + Statistics: Num rows: 9 Data size: 29 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 = _col5) and (_col6) IN ('foo', 'bar')) (type: boolean) + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Select Operator expressions: _col6 (type: string), _col0 (type: decimal(10,0)) outputColumnNames: _col6, _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), sum(_col0) keys: _col6 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Reducer 3 Reduce Operator Tree: @@ -207,15 +762,15 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: decimal(20,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,0)) Reducer 4 Reduce Operator Tree: @@ -222,10 +777,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: decimal(20,0)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -296,28 +851,28 @@ TableScan alias: dim_shops filterExpr: (id is not null and (label = 'bar')) (type: boolean) - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (id is not null and (label = 'bar')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: id (type: int) sort order: + Map-reduce partition columns: id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target Input: agg_01 Partition key expr: dim_shops_id - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Target column: dim_shops_id Target Vertex: Map 3 Map 2 @@ -325,28 +880,28 @@ TableScan alias: dim_shops filterExpr: (id is not null and (label = 'foo')) (type: boolean) - Statistics: Num rows: 2 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 15 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (id is not null and (label = 'foo')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: id (type: int) sort order: + Map-reduce partition columns: id (type: int) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Dynamic Partitioning Event Operator Target Input: agg_01 Partition key expr: dim_shops_id - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE Target column: dim_shops_id Target Vertex: Map 5 Map 3