diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java index e6f21e9..b6db6aa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/DynamicPartitionPruningOptimization.java @@ -394,19 +394,18 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex // we need the expr that generated the key of the reduce sink ExprNodeDesc key = ctx.generator.getConf().getKeyCols().get(ctx.desc.getKeyIndex()); - if (parentOfRS instanceof SelectOperator) { - // Make sure the semijoin branch is not on parition column. - String internalColName = null; - ExprNodeDesc exprNodeDesc = key; - // Find the ExprNodeColumnDesc - while (!(exprNodeDesc instanceof ExprNodeColumnDesc) && - (exprNodeDesc.getChildren() != null)) { - exprNodeDesc = exprNodeDesc.getChildren().get(0); - } - - if (exprNodeDesc instanceof ExprNodeColumnDesc) { - internalColName = ((ExprNodeColumnDesc) exprNodeDesc).getColumn(); + String internalColName = null; + ExprNodeDesc exprNodeDesc = key; + // Find the ExprNodeColumnDesc + while (!(exprNodeDesc instanceof ExprNodeColumnDesc) && + (exprNodeDesc.getChildren() != null)) { + exprNodeDesc = exprNodeDesc.getChildren().get(0); + } + if (exprNodeDesc instanceof ExprNodeColumnDesc) { + internalColName = ((ExprNodeColumnDesc) exprNodeDesc).getColumn(); + if (parentOfRS instanceof SelectOperator) { + // Make sure the semijoin branch is not on parition column. ExprNodeColumnDesc colExpr = ((ExprNodeColumnDesc) (parentOfRS. getColumnExprMap().get(internalColName))); String colName = ExprNodeDescUtils.extractColName(colExpr); @@ -423,12 +422,13 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex // The column is partition column, skip the optimization. return false; } - } else { - // No column found! - // Bail out - return false; } + } else { + // No column found! + // Bail out + return false; } + List keyExprs = new ArrayList(); keyExprs.add(key); @@ -438,9 +438,32 @@ private boolean generateSemiJoinOperatorPlan(DynamicListContext ctx, ParseContex // project the relevant key column SelectDesc select = new SelectDesc(keyExprs, outputNames); + + // Create the new RowSchema for the projected column + ColumnInfo columnInfo = parentOfRS.getSchema().getColumnInfo(internalColName); + ArrayList signature = new ArrayList(); + signature.add(columnInfo); + RowSchema rowSchema = new RowSchema(signature); + + // Create the column expr map + Map colExprMap = new HashMap(); + ExprNodeDesc exprNode = null; + if ( parentOfRS.getColumnExprMap() != null) { + exprNode = parentOfRS.getColumnExprMap().get(internalColName).clone(); + } else { + exprNode = new ExprNodeColumnDesc(columnInfo); + } + + if (exprNode instanceof ExprNodeColumnDesc) { + ExprNodeColumnDesc encd = (ExprNodeColumnDesc) exprNode; + encd.setColumn(internalColName); + } + colExprMap.put(internalColName, exprNode); + + // Create the Select Operator SelectOperator selectOp = (SelectOperator) OperatorFactory.getAndMakeChild(select, - new RowSchema(parentOfRS.getSchema()), parentOfRS); + rowSchema, colExprMap, parentOfRS); // do a group by to aggregate min,max and bloom filter. float groupByMemoryUsage = diff --git a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q index e686af6..5482cdb 100644 --- a/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q +++ b/ql/src/test/queries/clientpositive/dynamic_semijoin_reduction.q @@ -7,6 +7,7 @@ set hive.tez.dynamic.partition.pruning=true; set hive.tez.dynamic.semijoin.reduction=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; +set hive.stats.autogather=true; -- Create Tables create table alltypesorc_int ( cint int, cstring string ) stored as ORC; @@ -27,6 +28,10 @@ insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, va insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; set hive.tez.dynamic.semijoin.reduction=false; +analyze table alltypesorc_int compute statistics for columns; +analyze table srcpart_date compute statistics for columns; +analyze table srcpart_small compute statistics for columns; + -- single column, single key EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); @@ -52,8 +57,8 @@ set hive.tez.dynamic.semijoin.reduction=false; EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); set hive.tez.dynamic.semijoin.reduction=true; -select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); +select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1); set hive.tez.dynamic.semijoin.reduction=false; -- multiple sources, different keys @@ -75,6 +80,13 @@ set hive.tez.dynamic.semijoin.reduction=true; EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +-- With unions +explain select * from alltypesorc_int join + (select srcpart_date.key as key from srcpart_date + union all + select srcpart_small.key1 as key from srcpart_small) unionsrc on (alltypesorc_int.cstring = unionsrc.key); + + drop table srcpart_date; drop table srcpart_small; drop table alltypesorc_int; diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out index 012db41..d32cb5c 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction.q.out @@ -102,6 +102,52 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpart_small@ds=2008-04-09 POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table alltypesorc_int compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_int +PREHOOK: Output: default@alltypesorc_int +#### A masked pattern was here #### +POSTHOOK: query: analyze table alltypesorc_int compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_int +POSTHOOK: Output: default@alltypesorc_int +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_date compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Output: default@srcpart_date +PREHOOK: Output: default@srcpart_date@ds=2008-04-08 +PREHOOK: Output: default@srcpart_date@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_date compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_small compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +PREHOOK: Output: default@srcpart_small +PREHOOK: Output: default@srcpart_small@ds=2008-04-08 +PREHOOK: Output: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_small compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Output: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) @@ -124,19 +170,19 @@ STAGE PLANS: TableScan alias: srcpart_date filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -144,19 +190,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -168,15 +214,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -185,10 +231,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -243,19 +289,19 @@ STAGE PLANS: TableScan alias: srcpart_date filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -263,31 +309,31 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs @@ -300,15 +346,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -317,10 +363,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -329,13 +375,13 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 @@ -385,33 +431,33 @@ STAGE PLANS: TableScan alias: srcpart_date filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator Target column: ds (string) Target Input: srcpart_small Partition key expr: ds - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE Target Vertex: Map 4 Execution mode: llap LLAP IO: all inputs @@ -441,15 +487,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -458,10 +504,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -512,21 +558,21 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: cstring (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -534,39 +580,39 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cstring (type: string) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -580,15 +626,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 27033 Data size: 2038454 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16008 Data size: 128064 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -597,10 +643,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -646,112 +692,112 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 6 (BROADCAST_EDGE) - Map 8 <- Reducer 4 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) + Map 1 <- Reducer 5 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) + Map 7 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_cstring_min) AND DynamicValue(RS_11_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_11_srcpart_date_cstring_min) AND DynamicValue(RS_11_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_11_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) + expressions: cstring (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2000) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_cstring_min) AND DynamicValue(RS_9_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_9_srcpart_date_cstring_min) AND DynamicValue(RS_9_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_9_srcpart_date_cstring_bloom_filter))) and (cstring BETWEEN DynamicValue(RS_10_srcpart_small_cstring_min) AND DynamicValue(RS_10_srcpart_small_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_small_cstring_bloom_filter)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cstring (type: string) + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -765,15 +811,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) 2 _col0 (type: string) - Statistics: Num rows: 27033 Data size: 2038454 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16008 Data size: 128064 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -782,49 +828,49 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 @@ -876,19 +922,19 @@ STAGE PLANS: TableScan alias: srcpart_date filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -896,19 +942,19 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string), value1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -920,15 +966,15 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 360 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -937,10 +983,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -971,25 +1017,6 @@ POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### 8224 -PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) -PREHOOK: type: QUERY -PREHOOK: Input: default@srcpart_date -PREHOOK: Input: default@srcpart_date@ds=2008-04-08 -PREHOOK: Input: default@srcpart_date@ds=2008-04-09 -PREHOOK: Input: default@srcpart_small -PREHOOK: Input: default@srcpart_small@ds=2008-04-08 -PREHOOK: Input: default@srcpart_small@ds=2008-04-09 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@srcpart_date -POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 -POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 -POSTHOOK: Input: default@srcpart_small -POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 -POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 -#### A masked pattern was here #### -8224 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) @@ -1015,19 +1042,19 @@ STAGE PLANS: TableScan alias: srcpart_date filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value_min) AND DynamicValue(RS_7_srcpart_small_value_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter))) and (value BETWEEN DynamicValue(RS_7_srcpart_small_value_min) AND DynamicValue(RS_7_srcpart_small_value_max) and in_bloom_filter(value, DynamicValue(RS_7_srcpart_small_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 @@ -1035,44 +1062,44 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key1 is not null and value1 is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string), value1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=214) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs @@ -1085,15 +1112,15 @@ STAGE PLANS: keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 360 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1102,10 +1129,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1114,25 +1141,25 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=214) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 @@ -1141,6 +1168,25 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1 and srcpart_date.value = srcpart_small.value1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +8224 PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) join alltypesorc_int on (srcpart_date.value = alltypesorc_int.cstring) @@ -1162,62 +1208,62 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + alias: alltypesorc_int + filterExpr: cstring is not null (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + predicate: cstring is not null (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: TableScan - alias: srcpart_small - filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Execution mode: llap LLAP IO: all inputs Map 6 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: cstring is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: cstring (type: string) + expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -1228,14 +1274,14 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1245,15 +1291,15 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 13516 Data size: 1019227 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -1262,10 +1308,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1311,87 +1357,100 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 8 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) - Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Map 5 <- Reducer 8 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: srcpart_date - filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_cstring_min) AND DynamicValue(RS_10_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_10_srcpart_small_key_min) AND DynamicValue(RS_10_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_10_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_10_srcpart_date_cstring_min) AND DynamicValue(RS_10_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_10_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + expressions: cstring (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs - Map 6 + Map 5 Map Operator Tree: TableScan - alias: srcpart_small - filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_date + filterExpr: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null and (key BETWEEN DynamicValue(RS_13_srcpart_small_key_min) AND DynamicValue(RS_13_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_13_srcpart_small_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) Select Operator - expressions: _col0 (type: string) + expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=214) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs - Map 8 + Map 7 Map Operator Tree: TableScan - alias: alltypesorc_int - filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator - predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator - expressions: cstring (type: string) + expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12288 Data size: 926570 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -1402,27 +1461,14 @@ STAGE PLANS: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col0 (type: string) + 1 _col1 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=2200) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Statistics: Num rows: 3281 Data size: 285447 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1432,15 +1478,15 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 13516 Data size: 1019227 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16004 Data size: 128032 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -1449,37 +1495,37 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=2200) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=214) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 @@ -1532,14 +1578,14 @@ STAGE PLANS: TableScan alias: srcpart_date filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1548,15 +1594,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -1565,31 +1611,31 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs @@ -1600,10 +1646,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1612,13 +1658,13 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 @@ -1669,14 +1715,14 @@ STAGE PLANS: TableScan alias: srcpart_date filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key_min) AND DynamicValue(RS_7_srcpart_small_key_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key_bloom_filter)))) (type: boolean) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1685,15 +1731,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 2200 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9756 Data size: 78048 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -1702,31 +1748,31 @@ STAGE PLANS: TableScan alias: srcpart_small filterExpr: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key1 is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs @@ -1737,10 +1783,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1749,13 +1795,13 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1000) + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Stage: Stage-0 @@ -1783,6 +1829,150 @@ POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 #### A masked pattern was here #### 8224 +PREHOOK: query: explain select * from alltypesorc_int join + (select srcpart_date.key as key from srcpart_date + union all + select srcpart_small.key1 as key from srcpart_small) unionsrc on (alltypesorc_int.cstring = unionsrc.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from alltypesorc_int join + (select srcpart_date.key as key from srcpart_date + union all + select srcpart_small.key1 as key from srcpart_small) unionsrc on (alltypesorc_int.cstring = unionsrc.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 4 (BROADCAST_EDGE), Union 3 (BROADCAST_EDGE) + Map 2 <- Union 3 (CONTAINS) + Map 5 <- Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_int + filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 899146 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (cstring is not null and (cstring BETWEEN DynamicValue(RS_12_srcpart_date_cstring_min) AND DynamicValue(RS_12_srcpart_date_cstring_max) and in_bloom_filter(cstring, DynamicValue(RS_12_srcpart_date_cstring_bloom_filter)))) (type: boolean) + Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cint (type: int), cstring (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Union 3 + Statistics: Num rows: 4922 Data size: 910570 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 4922 Data size: 910570 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3000 Data size: 261000 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=205) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: llap + LLAP IO: all inputs + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=205) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Union 3 + Vertex: Union 3 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: drop table srcpart_date PREHOOK: type: DROPTABLE PREHOOK: Input: default@srcpart_date diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index 2dcfd6b..ae99e66 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -61,7 +61,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=14) mode: hash