diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 43dfceef61..6de4c445b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1968,8 +1968,19 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // 10. Sort predicates in filter expressions if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_SORT_PREDS_WITH_STATS)) { perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, HiveFilterSortPredicates.INSTANCE); + try { + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, + HepMatchOrder.BOTTOM_UP, HiveFilterSortPredicates.INSTANCE); + } catch (Exception e) { + boolean isMissingStats = noColsMissingStats.get() > 0; + if (isMissingStats) { + LOG.warn("Missing column stats (see previous messages), " + + "skipping sort predicates in filter expressions in CBO"); + noColsMissingStats.set(0); + } else { + throw e; + } + } perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Sort predicates within filter operators"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 1795ae5626..9a00a75b11 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -269,11 +269,14 @@ private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList p if (needColStats) { colStats = getTableColumnStats(table, schema, neededColumns, colStatsCache, fetchColStats); - estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema); + if (estimateStats) { + estimateStatsForMissingCols(neededColumns, colStats, table, conf, nr, schema); + } // we should have stats for all columns (estimated or actual) - assert (neededColumns.size() == colStats.size()); - long betterDS = getDataSizeFromColumnStats(nr, colStats); - ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; + if (neededColumns.size() == colStats.size()) { + long betterDS = getDataSizeFromColumnStats(nr, colStats); + ds = (betterDS < 1 || colStats.isEmpty()) ? ds : betterDS; + } } stats = new Statistics(nr, ds, numErasureCodedFiles); diff --git ql/src/test/queries/clientpositive/cbo_stats_estimation.q ql/src/test/queries/clientpositive/cbo_stats_estimation.q new file mode 100644 index 0000000000..80908f8a35 --- /dev/null +++ ql/src/test/queries/clientpositive/cbo_stats_estimation.q @@ -0,0 +1,11 @@ +CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int); +ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922'); + + +SET hive.stats.estimate=false; + +EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3; + +SET hive.stats.ndv.estimate.percent=5e-7; + +EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3; diff --git ql/src/test/results/clientpositive/cbo_stats_estimation.q.out ql/src/test/results/clientpositive/cbo_stats_estimation.q.out new file mode 100644 index 0000000000..389a9bc1cc --- /dev/null +++ ql/src/test/results/clientpositive/cbo_stats_estimation.q.out @@ -0,0 +1,278 @@ +PREHOOK: query: CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@claims +POSTHOOK: query: CREATE TABLE claims(claim_rec_id bigint, claim_invoice_num string, typ_c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@claims +PREHOOK: query: ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@claims +PREHOOK: Output: default@claims +POSTHOOK: query: ALTER TABLE claims UPDATE STATISTICS set ('numRows'='1154941534','rawDataSize'='1135307527922') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@claims +POSTHOOK: Output: default@claims +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +PREHOOK: type: QUERY +PREHOOK: Input: default@claims +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@claims +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` +FROM `default`.`claims` +WHERE `typ_c` = 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: claims + filterExpr: (typ_c = 3) (type: boolean) + Statistics: Num rows: 1154941534 Data size: 1135307527922 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (typ_c = 3) (type: boolean) + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: claims + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.claims + name: default.claims + Truncated Path -> Alias: + /claims [claims] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +PREHOOK: type: QUERY +PREHOOK: Input: default@claims +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN EXTENDED SELECT count(1) FROM claims WHERE typ_c=3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@claims +#### A masked pattern was here #### +OPTIMIZED SQL: SELECT COUNT(*) AS `$f0` +FROM `default`.`claims` +WHERE `typ_c` = 3 +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: claims + filterExpr: (typ_c = 3) (type: boolean) + Statistics: Num rows: 1154941534 Data size: 1135307527922 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (typ_c = 3) (type: boolean) + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 577470767 Data size: 567653763961 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Execution mode: vectorized + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: claims + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + bucketing_version 2 + column.name.delimiter , + columns claim_rec_id,claim_invoice_num,typ_c + columns.comments + columns.types bigint:string:int +#### A masked pattern was here #### + name default.claims + numFiles 0 + numRows 1154941534 + rawDataSize 1135307527922 + serialization.ddl struct claims { i64 claim_rec_id, string claim_invoice_num, i32 typ_c} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.claims + name: default.claims + Truncated Path -> Alias: + /claims [claims] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out index fddffbb0a8..df15d59de0 100644 --- ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out +++ ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out @@ -223,6 +223,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select count(1) from part_nostats,supplier_nostats,lineitem_nostats where p_partkey = l_partkey and s_suppkey = l_suppkey PREHOOK: type: QUERY PREHOOK: Input: default@lineitem_nostats @@ -244,70 +245,67 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 5 (XPROD_EDGE) Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: lineitem_nostats - filterExpr: (l_partkey is not null and l_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 Map Operator Tree: TableScan alias: part_nostats filterExpr: p_partkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: p_partkey (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: supplier_nostats filterExpr: s_suppkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: s_suppkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: s_suppkey (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: lineitem_nostats + filterExpr: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -317,15 +315,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -333,18 +331,18 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -353,10 +351,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -589,6 +587,7 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[47][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select count(1) from Employee_Part_n1,supplier_nostats,lineitem_nostats where employeeID= l_partkey and s_suppkey = l_suppkey PREHOOK: type: QUERY PREHOOK: Input: default@employee_part_n1 @@ -622,33 +621,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Map 5 (XPROD_EDGE) Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: lineitem_nostats - filterExpr: (l_partkey is not null and l_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: l_partkey (type: int), l_suppkey (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - Execution mode: vectorized, llap - LLAP IO: no inputs - Map 5 Map Operator Tree: TableScan alias: employee_part_n1 @@ -662,30 +640,48 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + sort order: Statistics: Num rows: 1 Data size: 4 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: supplier_nostats filterExpr: s_suppkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: s_suppkey is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: s_suppkey (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: lineitem_nostats + filterExpr: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: -1 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 @@ -695,15 +691,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -711,9 +707,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 9223372036854775807 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count() minReductionHashAggr: 0.99