diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index caec2c08e9..a1401aac72 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -190,8 +190,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Obje } } else { LOG.debug("Column " + column + " is not a partition column"); - semiJoin = semiJoin && !disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx); - if (semiJoin && ts.getConf().getFilterExpr() != null) { + if (semiJoin && !disableSemiJoinOptDueToExternalTable(parseContext.getConf(), ts, ctx) + && ts.getConf().getFilterExpr() != null) { LOG.debug("Initiate semijoin reduction for " + column + " (" + ts.getConf().getFilterExpr().getExprString()); diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q index a04ab666e0..9b1723afea 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction_4.q @@ -18,6 +18,7 @@ set hive.disable.unsafe.external.table.operations=true; -- Create Tables create table srcpart_date_n1 (key string, value string) partitioned by (ds string ) stored as ORC; CREATE TABLE srcpart_small_n0(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC; +CREATE TABLE srcpart_medium_n0(key2 STRING, value2 STRING) partitioned by (ds string) STORED as ORC; create external table srcpart_date_ext (key string, value string) partitioned by (ds string ) stored as ORC; CREATE external TABLE srcpart_small_ext(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC; @@ -28,6 +29,8 @@ alter table srcpart_date_n1 add partition (ds = "2008-04-09"); alter table srcpart_small_n0 add partition (ds = "2008-04-08"); alter table srcpart_small_n0 add partition (ds = "2008-04-09"); +alter table srcpart_medium_n0 add partition (ds = "2008-04-08"); + alter table srcpart_date_ext add partition (ds = "2008-04-08"); alter table srcpart_date_ext add partition (ds = "2008-04-09"); @@ -38,6 +41,7 @@ alter table srcpart_small_ext add partition (ds = "2008-04-09"); insert overwrite table srcpart_date_n1 partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; insert overwrite table srcpart_date_n1 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; insert overwrite table srcpart_small_n0 partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20; +insert overwrite table srcpart_medium_n0 partition (ds = "2008-04-08") select key, value from srcpart where ds = "2008-04-09" limit 50; insert overwrite table srcpart_date_ext partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; insert overwrite table srcpart_date_ext partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; @@ -45,6 +49,7 @@ insert overwrite table srcpart_small_ext partition (ds = "2008-04-09") select ke analyze table srcpart_date_n1 compute statistics for columns; analyze table srcpart_small_n0 compute statistics for columns; +analyze table srcpart_medium_n0 compute statistics for columns; analyze table srcpart_date_ext compute statistics for columns; analyze table srcpart_small_ext compute statistics for columns; @@ -54,6 +59,8 @@ analyze table srcpart_small_ext compute statistics for columns; set test.comment=This query should use semijoin reduction optimization; set test.comment; EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_n0 on (srcpart_date_n1.key = srcpart_small_n0.key1); +-- multiple sources, single key +EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_n0 on (srcpart_date_n1.key = srcpart_small_n0.key1) join srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key); set test.comment=Big table is external table - no semijoin reduction opt; set test.comment; @@ -63,3 +70,7 @@ set test.comment=Small table is external table - no semijoin reduction opt; set test.comment; EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_ext on (srcpart_date_n1.key = srcpart_small_ext.key1); +set test.comment=Small table is external table - no semijoin reduction opt for ext table but semijoin reduction opt for regular table; +set test.comment; +EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_ext on (srcpart_date_n1.key = srcpart_small_ext.key1) join srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key); + diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out index 0feb362023..4c04eb1a08 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_4.q.out @@ -14,6 +14,14 @@ POSTHOOK: query: CREATE TABLE srcpart_small_n0(key1 STRING, value1 STRING) parti POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpart_small_n0 +PREHOOK: query: CREATE TABLE srcpart_medium_n0(key2 STRING, value2 STRING) partitioned by (ds string) STORED as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_medium_n0 +POSTHOOK: query: CREATE TABLE srcpart_medium_n0(key2 STRING, value2 STRING) partitioned by (ds string) STORED as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_medium_n0 PREHOOK: query: create external table srcpart_date_ext (key string, value string) partitioned by (ds string ) stored as ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -58,6 +66,13 @@ POSTHOOK: query: alter table srcpart_small_n0 add partition (ds = "2008-04-09") POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Output: default@srcpart_small_n0 POSTHOOK: Output: default@srcpart_small_n0@ds=2008-04-09 +PREHOOK: query: alter table srcpart_medium_n0 add partition (ds = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_medium_n0 +POSTHOOK: query: alter table srcpart_medium_n0 add partition (ds = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_medium_n0 +POSTHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08 PREHOOK: query: alter table srcpart_date_ext add partition (ds = "2008-04-08") PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Output: default@srcpart_date_ext @@ -128,6 +143,20 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_small_n0@ds=2008-04-09 POSTHOOK: Lineage: srcpart_small_n0 PARTITION(ds=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_small_n0 PARTITION(ds=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_medium_n0 partition (ds = "2008-04-08") select key, value from srcpart where ds = "2008-04-09" limit 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08 +POSTHOOK: query: insert overwrite table srcpart_medium_n0 partition (ds = "2008-04-08") select key, value from srcpart where ds = "2008-04-09" limit 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08 +POSTHOOK: Lineage: srcpart_medium_n0 PARTITION(ds=2008-04-08).key2 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_medium_n0 PARTITION(ds=2008-04-08).value2 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: insert overwrite table srcpart_date_ext partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -206,6 +235,20 @@ POSTHOOK: Output: default@srcpart_small_n0 POSTHOOK: Output: default@srcpart_small_n0@ds=2008-04-08 POSTHOOK: Output: default@srcpart_small_n0@ds=2008-04-09 #### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_medium_n0 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@srcpart_medium_n0 +PREHOOK: Input: default@srcpart_medium_n0@ds=2008-04-08 +PREHOOK: Output: default@srcpart_medium_n0 +PREHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_medium_n0 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@srcpart_medium_n0 +POSTHOOK: Input: default@srcpart_medium_n0@ds=2008-04-08 +POSTHOOK: Output: default@srcpart_medium_n0 +POSTHOOK: Output: default@srcpart_medium_n0@ds=2008-04-08 +#### A masked pattern was here #### PREHOOK: query: analyze table srcpart_date_ext compute statistics for columns PREHOOK: type: ANALYZE_TABLE PREHOOK: Input: default@srcpart_date_ext @@ -368,6 +411,179 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_n0 on (srcpart_date_n1.key = srcpart_small_n0.key1) join srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_n0 on (srcpart_date_n1.key = srcpart_small_n0.key1) join srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date_n1 + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_medium_n0_key2_min) AND DynamicValue(RS_10_srcpart_medium_n0_key2_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_medium_n0_key2_bloom_filter))) and (key BETWEEN DynamicValue(RS_11_srcpart_small_n0_key1_min) AND DynamicValue(RS_11_srcpart_small_n0_key1_max) and in_bloom_filter(key, DynamicValue(RS_11_srcpart_small_n0_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key BETWEEN DynamicValue(RS_10_srcpart_medium_n0_key2_min) AND DynamicValue(RS_10_srcpart_medium_n0_key2_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_medium_n0_key2_bloom_filter))) and (key BETWEEN DynamicValue(RS_11_srcpart_small_n0_key1_min) AND DynamicValue(RS_11_srcpart_small_n0_key1_max) and in_bloom_filter(key, DynamicValue(RS_11_srcpart_small_n0_key1_bloom_filter))) and key is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_medium_n0 + filterExpr: key2 is not null (type: boolean) + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key2 is not null (type: boolean) + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key2 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=49) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: srcpart_small_n0 + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=20) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 4400 Data size: 382800 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=49) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=20) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 639 Basic stats: PARTIAL Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + test.comment=Big table is external table - no semijoin reduction opt PREHOOK: query: EXPLAIN select count(*) from srcpart_date_ext join srcpart_small_n0 on (srcpart_date_ext.key = srcpart_small_n0.key1) PREHOOK: type: QUERY @@ -566,3 +782,151 @@ STAGE PLANS: Processor Tree: ListSink +test.comment=Small table is external table - no semijoin reduction opt for ext table but semijoin reduction opt for regular table +PREHOOK: query: EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_ext on (srcpart_date_n1.key = srcpart_small_ext.key1) join srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date_n1 join srcpart_small_ext on (srcpart_date_n1.key = srcpart_small_ext.key1) join srcpart_medium_n0 on (srcpart_medium_n0.key2 = srcpart_date_n1.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date_n1 + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_medium_n0_key2_min) AND DynamicValue(RS_10_srcpart_medium_n0_key2_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_medium_n0_key2_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key BETWEEN DynamicValue(RS_10_srcpart_medium_n0_key2_min) AND DynamicValue(RS_10_srcpart_medium_n0_key2_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_medium_n0_key2_bloom_filter))) and key is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_medium_n0 + filterExpr: key2 is not null (type: boolean) + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key2 is not null (type: boolean) + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key2 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 50 Data size: 4350 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=49) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 6 + Map Operator Tree: + TableScan + alias: srcpart_small_ext + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 1740 Basic stats: PARTIAL Column stats: PARTIAL + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + Statistics: Num rows: 4400 Data size: 382800 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=49) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +