diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedScanOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedScanOptimizer.java index 3349fc0..e31119f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedScanOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedScanOptimizer.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.SemiJoinBranchInfo; import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc; @@ -141,6 +142,15 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // Skip continue; } + // If partitions do not match, we currently do not merge + PrunedPartitionList prevTsOpPPList = pctx.getPrunedPartitions(prevTsOp); + PrunedPartitionList tsOpPPList = pctx.getPrunedPartitions(tsOp); + if (prevTsOpPPList.hasUnknownPartitions() + || tsOpPPList.hasUnknownPartitions() + || !prevTsOpPPList.getPartitions().equals(tsOpPPList.getPartitions())) { + // Skip + continue; + } // It seems these two operators can be merged. // Check that plan meets some preconditions before doing it. @@ -226,27 +236,18 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { } private static Set gatherNotValidTableScanOps( - ParseContext pctx, SharedScanOptimizerCache optimizerCache) { + ParseContext pctx, SharedScanOptimizerCache optimizerCache) throws SemanticException { // Find TS operators with partition pruning enabled in plan // because these TS may potentially read different data for // different pipeline. // These can be: - // 1) TS with static partitioning. - // TODO: Check partition list of different TS and do not add if they are identical - // 2) TS with DPP. + // 1) TS with DPP. // TODO: Check if dynamic filters are identical and do not add. - // 3) TS with semijoin DPP. + // 2) TS with semijoin DPP. // TODO: Check for dynamic filters. Set notValidTableScanOps = new HashSet<>(); - // 1) TS with static partitioning. + // 1) TS with DPP. Map topOps = pctx.getTopOps(); - for (TableScanOperator tsOp : topOps.values()) { - if (tsOp.getConf().getPartColumns() != null && - !tsOp.getConf().getPartColumns().isEmpty()) { - notValidTableScanOps.add(tsOp); - } - } - // 2) TS with DPP. Collection> tableScanOps = Lists.>newArrayList(topOps.values()); Set s = @@ -258,7 +259,7 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { optimizerCache.tableScanToDPPSource.put(dped.getTableScan(), a); } } - // 3) TS with semijoin DPP. + // 2) TS with semijoin DPP. for (Entry e : pctx.getRsToSemiJoinBranchInfo().entrySet()) { notValidTableScanOps.add(e.getValue().getTsOp()); diff --git ql/src/test/queries/clientpositive/partition_shared_scan.q ql/src/test/queries/clientpositive/partition_shared_scan.q new file mode 100644 index 0000000..7ddf294 --- /dev/null +++ ql/src/test/queries/clientpositive/partition_shared_scan.q @@ -0,0 +1,23 @@ +set hive.merge.nway.joins=false; + +drop table foo; + +create table foo (i int) partitioned by (s string); +insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10; +insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10; + +explain +select * +from foo f1 +join part p1 on (p1.p_partkey = f1.i) +join foo f2 on (f1.i = f2.i) +where f1.s='foo' and f2.s='bar'; + +explain +select * +from foo f1 +join part p1 on (p1.p_partkey = f1.i) +join foo f2 on (f1.i = f2.i) +where f1.s='foo' and f2.s='foo'; + +drop table foo; diff --git ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out index 9c66115..2875e13 100644 --- ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/dynamic_partition_pruning.q.out @@ -5662,10 +5662,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 1 (BROADCAST_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -5674,18 +5674,22 @@ STAGE PLANS: alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ds is not null (type: boolean) + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: ds @@ -5701,7 +5705,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart @@ -5721,7 +5725,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5742,7 +5746,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5770,7 +5774,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5785,7 +5789,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -5806,8 +5810,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Union 3 - Vertex: Union 3 + Union 4 + Vertex: Union 4 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out new file mode 100644 index 0000000..34ba87c --- /dev/null +++ ql/src/test/results/clientpositive/llap/partition_shared_scan.q.out @@ -0,0 +1,293 @@ +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table foo (i int) partitioned by (s string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo +POSTHOOK: query: create table foo (i int) partitioned by (s string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo +PREHOOK: query: insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@foo@s=foo +POSTHOOK: query: insert overwrite table foo partition(s='foo') select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@foo@s=foo +POSTHOOK: Lineage: foo PARTITION(s=foo).i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@foo@s=bar +POSTHOOK: query: insert overwrite table foo partition(s='bar') select cint from alltypesorc limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@foo@s=bar +POSTHOOK: Lineage: foo PARTITION(s=bar).i SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +PREHOOK: query: explain +select * +from foo f1 +join part p1 on (p1.p_partkey = f1.i) +join foo f2 on (f1.i = f2.i) +where f1.s='foo' and f2.s='bar' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from foo f1 +join part p1 on (p1.p_partkey = f1.i) +join foo f2 on (f1.i = f2.i) +where f1.s='foo' and f2.s='bar' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: f1 + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: f2 + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 'foo' (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string), _col11 (type: int), 'bar' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select * +from foo f1 +join part p1 on (p1.p_partkey = f1.i) +join foo f2 on (f1.i = f2.i) +where f1.s='foo' and f2.s='foo' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from foo f1 +join part p1 on (p1.p_partkey = f1.i) +join foo f2 on (f1.i = f2.i) +where f1.s='foo' and f2.s='foo' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: f1 + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 28 Data size: 17703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), 'foo' (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: int), _col8 (type: string), _col9 (type: double), _col10 (type: string), _col11 (type: int), 'foo' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 19473 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table foo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo +PREHOOK: Output: default@foo +POSTHOOK: query: drop table foo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo +POSTHOOK: Output: default@foo diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index b9b8241..d9fc6b5 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -6444,10 +6444,10 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Map 1 (BROADCAST_EDGE), Union 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Map 1 (BROADCAST_EDGE), Union 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 4 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -6456,18 +6456,30 @@ STAGE PLANS: alias: srcpart filterExpr: ds is not null (type: boolean) Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ds is not null (type: boolean) + Select Operator + expressions: ds (type: string) + outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2000 Data size: 368000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 389248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: ds @@ -6491,7 +6503,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 6 + Map 7 Map Operator Tree: TableScan alias: srcpart @@ -6519,7 +6531,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -6547,7 +6559,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -6582,7 +6594,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 5 + Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -6604,7 +6616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -6632,8 +6644,8 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - Union 3 - Vertex: Union 3 + Union 4 + Vertex: Union 4 Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/perf/query24.q.out ql/src/test/results/clientpositive/perf/query24.q.out index 3e43dad..105871f 100644 --- ql/src/test/results/clientpositive/perf/query24.q.out +++ ql/src/test/results/clientpositive/perf/query24.q.out @@ -98,20 +98,20 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 22 (SIMPLE_EDGE) -Reducer 16 <- Map 23 (SIMPLE_EDGE), Reducer 15 (SIMPLE_EDGE) -Reducer 17 <- Map 24 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 25 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 26 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE) -Reducer 20 <- Reducer 19 (SIMPLE_EDGE) -Reducer 21 <- Reducer 20 (CUSTOM_SIMPLE_EDGE) -Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) -Reducer 6 <- Map 13 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 10 <- Map 18 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 11 <- Map 19 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) +Reducer 12 <- Map 17 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) +Reducer 13 <- Map 20 (SIMPLE_EDGE), Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (CUSTOM_SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) +Reducer 3 <- Map 17 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 4 <- Map 18 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 19 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Map 20 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) -Reducer 8 <- Reducer 21 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Reducer 15 (CUSTOM_SIMPLE_EDGE), Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) Stage-0 Fetch Operator @@ -125,13 +125,13 @@ Stage-0 predicate:(_col3 > _col4) Merge Join Operator [MERGEJOIN_154] (rows=231911707 width=489) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] - <-Reducer 21 [CUSTOM_SIMPLE_EDGE] + <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_87] Select Operator [SEL_85] (rows=1 width=400) Output:["_col0"] Group By Operator [GBY_84] (rows=1 width=400) Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Reducer 20 [CUSTOM_SIMPLE_EDGE] + <-Reducer 14 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_83] Group By Operator [GBY_82] (rows=1 width=400) Output:["_col0"],aggregations:["avg(_col10)"] @@ -139,86 +139,86 @@ Stage-0 Output:["_col10"] Group By Operator [GBY_79] (rows=463823414 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, KEY._col8, KEY._col9 - <-Reducer 19 [SIMPLE_EDGE] + <-Reducer 13 [SIMPLE_EDGE] SHUFFLE [RS_78] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Group By Operator [GBY_77] (rows=927646829 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col4)"],keys:_col22, _col17, _col18, _col19, _col20, _col21, _col13, _col14, _col8, _col10 Merge Join Operator [MERGEJOIN_153] (rows=927646829 width=88) Conds:RS_73._col11, _col15=RS_74._col1, upper(_col2)(Inner),Output:["_col4","_col8","_col10","_col13","_col14","_col17","_col18","_col19","_col20","_col21","_col22"] - <-Map 26 [SIMPLE_EDGE] + <-Map 20 [SIMPLE_EDGE] SHUFFLE [RS_74] PartitionCols:_col1, upper(_col2) Select Operator [SEL_60] (rows=40000000 width=1014) Output:["_col0","_col1","_col2"] Filter Operator [FIL_143] (rows=40000000 width=1014) predicate:(ca_zip is not null and ca_country is not null) - TableScan [TS_58] (rows=40000000 width=1014) + TableScan [TS_15] (rows=40000000 width=1014) default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_state","ca_zip","ca_country"] - <-Reducer 18 [SIMPLE_EDGE] + <-Reducer 12 [SIMPLE_EDGE] SHUFFLE [RS_73] PartitionCols:_col11, _col15 Merge Join Operator [MERGEJOIN_152] (rows=843315281 width=88) Conds:RS_70._col0=RS_71._col0(Inner),Output:["_col4","_col8","_col10","_col11","_col13","_col14","_col15","_col17","_col18","_col19","_col20","_col21"] - <-Map 25 [SIMPLE_EDGE] + <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_71] PartitionCols:_col0 Select Operator [SEL_57] (rows=462000 width=1436) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Filter Operator [FIL_142] (rows=462000 width=1436) predicate:i_item_sk is not null - TableScan [TS_55] (rows=462000 width=1436) + TableScan [TS_6] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] - <-Reducer 17 [SIMPLE_EDGE] + <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_70] PartitionCols:_col0 Merge Join Operator [MERGEJOIN_151] (rows=766650239 width=88) Conds:RS_67._col1=RS_68._col0(Inner),Output:["_col0","_col4","_col8","_col10","_col11","_col13","_col14","_col15"] - <-Map 24 [SIMPLE_EDGE] + <-Map 19 [SIMPLE_EDGE] SHUFFLE [RS_68] PartitionCols:_col0 Select Operator [SEL_54] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_141] (rows=80000000 width=860) predicate:(c_customer_sk is not null and c_birth_country is not null) - TableScan [TS_52] (rows=80000000 width=860) + TableScan [TS_12] (rows=80000000 width=860) default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] - <-Reducer 16 [SIMPLE_EDGE] + <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_67] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_150] (rows=696954748 width=88) Conds:RS_64._col2=RS_65._col0(Inner),Output:["_col0","_col1","_col4","_col8","_col10","_col11"] - <-Map 23 [SIMPLE_EDGE] + <-Map 18 [SIMPLE_EDGE] SHUFFLE [RS_65] PartitionCols:_col0 Select Operator [SEL_51] (rows=852 width=1910) Output:["_col0","_col1","_col3","_col4"] Filter Operator [FIL_140] (rows=852 width=1910) predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) - TableScan [TS_49] (rows=1704 width=1910) + TableScan [TS_9] (rows=1704 width=1910) default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] - <-Reducer 15 [SIMPLE_EDGE] + <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_64] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_149] (rows=633595212 width=88) Conds:RS_61._col0, _col3=RS_62._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col4"] - <-Map 14 [SIMPLE_EDGE] + <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_61] PartitionCols:_col0, _col3 Select Operator [SEL_45] (rows=575995635 width=88) Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_138] (rows=575995635 width=88) predicate:(ss_item_sk is not null and ss_ticket_number is not null and ss_store_sk is not null and ss_customer_sk is not null) - TableScan [TS_43] (rows=575995635 width=88) + TableScan [TS_0] (rows=575995635 width=88) default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Map 22 [SIMPLE_EDGE] + <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_62] PartitionCols:_col0, _col1 Select Operator [SEL_48] (rows=57591150 width=77) Output:["_col0","_col1"] Filter Operator [FIL_139] (rows=57591150 width=77) predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_46] (rows=57591150 width=77) + TableScan [TS_3] (rows=57591150 width=77) default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_86] @@ -237,57 +237,53 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["sum(_col4)"],keys:_col19, _col20, _col14, _col22, _col8, _col9, _col11, _col12, _col16 Merge Join Operator [MERGEJOIN_148] (rows=927646829 width=88) Conds:RS_30._col17, _col21=RS_31._col1, upper(_col2)(Inner),Output:["_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col19","_col20","_col22"] - <-Map 13 [SIMPLE_EDGE] + <-Map 20 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col1, upper(_col2) Select Operator [SEL_17] (rows=40000000 width=1014) Output:["_col0","_col1","_col2"] Filter Operator [FIL_137] (rows=40000000 width=1014) predicate:(ca_zip is not null and ca_country is not null) - TableScan [TS_15] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_state","ca_zip","ca_country"] + Please refer to the previous TableScan [TS_15] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col17, _col21 Merge Join Operator [MERGEJOIN_147] (rows=843315281 width=88) Conds:RS_27._col1=RS_28._col0(Inner),Output:["_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col17","_col19","_col20","_col21"] - <-Map 12 [SIMPLE_EDGE] + <-Map 19 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 Select Operator [SEL_14] (rows=80000000 width=860) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_136] (rows=80000000 width=860) predicate:(c_customer_sk is not null and c_birth_country is not null) - TableScan [TS_12] (rows=80000000 width=860) - default@customer,customer,Tbl:COMPLETE,Col:NONE,Output:["c_customer_sk","c_first_name","c_last_name","c_birth_country"] + Please refer to the previous TableScan [TS_12] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col1 Merge Join Operator [MERGEJOIN_146] (rows=766650239 width=88) Conds:RS_24._col2=RS_25._col0(Inner),Output:["_col1","_col4","_col8","_col9","_col11","_col12","_col14","_col16","_col17"] - <-Map 11 [SIMPLE_EDGE] + <-Map 18 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 Select Operator [SEL_11] (rows=852 width=1910) Output:["_col0","_col1","_col3","_col4"] Filter Operator [FIL_135] (rows=852 width=1910) predicate:((s_market_id = 7) and s_store_sk is not null and s_zip is not null) - TableScan [TS_9] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_store_name","s_market_id","s_state","s_zip"] + Please refer to the previous TableScan [TS_9] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col2 Merge Join Operator [MERGEJOIN_145] (rows=696954748 width=88) Conds:RS_21._col0=RS_22._col0(Inner),Output:["_col1","_col2","_col4","_col8","_col9","_col11","_col12"] - <-Map 10 [SIMPLE_EDGE] + <-Map 17 [SIMPLE_EDGE] SHUFFLE [RS_22] PartitionCols:_col0 Select Operator [SEL_8] (rows=231000 width=1436) Output:["_col0","_col1","_col2","_col4","_col5"] Filter Operator [FIL_134] (rows=231000 width=1436) predicate:((i_color = 'orchid') and i_item_sk is not null) - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_current_price","i_size","i_color","i_units","i_manager_id"] + Please refer to the previous TableScan [TS_6] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col0 @@ -300,15 +296,13 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_132] (rows=575995635 width=88) predicate:(ss_item_sk is not null and ss_ticket_number is not null and ss_store_sk is not null and ss_customer_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_sales_price"] - <-Map 9 [SIMPLE_EDGE] + Please refer to the previous TableScan [TS_0] + <-Map 16 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0, _col1 Select Operator [SEL_5] (rows=57591150 width=77) Output:["_col0","_col1"] Filter Operator [FIL_133] (rows=57591150 width=77) predicate:(sr_item_sk is not null and sr_ticket_number is not null) - TableScan [TS_3] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_item_sk","sr_ticket_number"] + Please refer to the previous TableScan [TS_3]