diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 1795ae5626..6f381d58ee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -269,11 +269,13 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p if (needColStats) { colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache, fetchColStats); - estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema); - // we should have stats for all columns (estimated or actual) - assert (neededColumns.size() == colStats.size()); - long betterDS = getDataSizeFromColumnStats(nr, colStats); - ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; + if (estimateStats) { + estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema); + // we should have stats for all columns (estimated or actual) + assert (neededColumns.size() == colStats.size()); + long betterDS = getDataSizeFromColumnStats(nr, colStats); + ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; + } } stats = new Statistics(nr, ds, numErasureCodedFiles); diff --git ql/src/test/queries/clientpositive/cbo_stats_estimation.q ql/src/test/queries/clientpositive/cbo_stats_estimation.q new file mode 100644 index 0000000000..80908f8a35 --- /dev/null +++ ql/src/test/queries/clientpositive/cbo_stats_estimation.q @@ -0,0 +1,11 @@ +CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int); +ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922'); + + +SET hive.stats.estimate=false; + +EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3; + +SET hive.stats.ndv.estimate.percent=5e-7; + +EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3; diff --git ql/src/test/results/clientpositive/cbo_stats_estimation.q.out ql/src/test/results/clientpositive/cbo_stats_estimation.q.out new file mode 100644 index 0000000000..389a9bc1cc --- /dev/null +++ ql/src/test/results/clientpositive/cbo_stats_estimation.q.out @@ -0,0 +1,278 @@ +PREHOOK: query: CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@claims +POSTHOOK: query: CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@claims +PREHOOK: query: ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@claims +PREHOOK: Output: default@claims +POSTHOOK: query: ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@claims +POSTHOOK: Output: default@claims +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +PREHOOK: type: QUERY +PREHOOK: Input: default@claims +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@claims +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` +FROM `default`.`claims` +WHERE `typ_c` = 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: claims + filterExpr: (typ_c = 3) (type: boolean) + Statistics: Num rows: 1154941534 Data size: 1135307527922 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (typ_c = 3) (type: boolean) + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: claims + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.claims + name: default.claims + Truncated Path -> Alias: + /claims [claims] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +PREHOOK: type: QUERY +PREHOOK: Input: default@claims +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@claims +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` +FROM `default`.`claims` +WHERE `typ_c` = 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: claims + filterExpr: (typ_c = 3) (type: boolean) + Statistics: Num rows: 1154941534 Data size: 1135307527922 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (typ_c = 3) (type: boolean) + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: claims + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.claims + name: default.claims + Truncated Path -> Alias: + /claims [claims] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out index fddffbb0a8..c28ce3d60d 100644 --- ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out +++ ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out @@ -223,6 +223,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[45][tables = [part_nostats, supplier_nostats]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select count(1) from part_nostats,supplier_nostats,lineitem_nostats where p_partkey = l_partkey and s_suppkey = l_suppkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem_nostats @@ -244,7 +245,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 5 (XPROD_EDGE) Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### @@ -252,62 +253,47 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: lineitem_nostats - filterExpr: (l_partkey is not null and l_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + alias: part_nostats + filterExpr: p_partkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + value expressions: p_partkey (type: int) Execution mode: vectorized, llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan - alias: part_nostats - filterExpr: p_partkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + alias: supplier_nostats + filterExpr: s_suppkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: s_suppkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + value expressions: s_suppkey (type: int) Execution mode: vectorized, llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan - alias: supplier_nostats - filterExpr: s_suppkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + alias: lineitem_nostats + filterExpr: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: s_suppkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_suppkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: l_partkey (type: int), l_suppkey (type: int) + sort order: ++ + Map-reduce partition columns: l_partkey (type: int), l_suppkey (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -317,15 +303,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col12 + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int), _col12 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col12 (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -333,19 +319,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + 0 _col0 (type: int), _col12 (type: int) + 1 l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col12, _col23, _col24 + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((_col12 = _col24) and (_col0 = _col23)) (type: boolean) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -353,10 +345,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -589,6 +581,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[45][tables = [employee_part_n1, supplier_nostats]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select count(1) from Employee_Part_n1,supplier_nostats,lineitem_nostats where employeeID= l_partkey and s_suppkey = l_suppkey PREHOOK: type: QUERY PREHOOK: Input: default@employee_part_n1 @@ -622,33 +615,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 5 (XPROD_EDGE) Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: lineitem_nostats - filterExpr: (l_partkey is not null and l_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 Map Operator Tree: TableScan alias: employee_part_n1 @@ -657,35 +629,41 @@ STAGE PLANS: Filter Operator predicate: employeeid is not null (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: employeeid (type: int) - outputColumnNames: _col0 + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE + value expressions: employeeid (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: supplier_nostats filterExpr: s_suppkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: s_suppkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_suppkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + value expressions: s_suppkey (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: lineitem_nostats + filterExpr: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: l_partkey (type: int), l_suppkey (type: int) + sort order: ++ + Map-reduce partition columns: l_partkey (type: int), l_suppkey (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -695,15 +673,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col7 + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + key expressions: _col0 (type: int), _col7 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col7 (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -711,19 +689,25 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE - value expressions: _col0 (type: bigint) + 0 _col0 (type: int), _col7 (type: int) + 1 l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col7, _col18, _col19 + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((_col0 = _col18) and (_col7 = _col19)) (type: boolean) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: