diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 2257cc14d5..0198c0f724 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8595,7 +8595,7 @@ private Operator genUDTFPlan(GenericUDTF genericUDTF, String outputTableAlias, L @SuppressWarnings("nls") private Operator genLimitMapRedPlan(String dest, QB qb, Operator input, - int offset, int limit, boolean extraMRStep) throws SemanticException { + int offset, int limit, boolean extraMRStep) throws SemanticException { // A map-only job can be optimized - instead of converting it to a // map-reduce job, we can have another map // job to do the same to avoid the cost of sorting in the map-reduce phase. @@ -10924,23 +10924,23 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, if (limit != null) { // In case of order by, only 1 reducer is used, so no need of // another shuffle - curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(), - limit.intValue(), !hasOrderBy); + curr = genLimitMapRedPlan(dest, qb, curr, offset, + limit, limit != 0 && !hasOrderBy); } } else { // exact limit can be taken care of by the fetch operator if (limit != null) { boolean extraMRStep = true; - if (hasOrderBy || + if (limit == 0 || hasOrderBy || qb.getIsQuery() && qbp.getClusterByForClause(dest) == null && qbp.getSortByForClause(dest) == null) { extraMRStep = false; } - curr = genLimitMapRedPlan(dest, qb, curr, offset.intValue(), - limit.intValue(), extraMRStep); - qb.getParseInfo().setOuterQueryLimit(limit.intValue()); + curr = genLimitMapRedPlan(dest, qb, curr, offset, + limit, extraMRStep); + qb.getParseInfo().setOuterQueryLimit(limit); } if (!queryState.getHiveOperation().equals(HiveOperation.CREATEVIEW)) { curr = genFileSinkPlan(dest, qb, curr); diff --git a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out index bfa72c59a3..cf0016c6c9 100644 --- a/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_only_empty_query.q.out @@ -54,9 +54,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -72,28 +71,14 @@ STAGE PLANS: Number of rows: 0 Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: decimal(3,2)) + value expressions: _col0 (type: string), _col2 (type: decimal(3,2)) Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(3,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col2 (type: decimal(3,2)) - Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator @@ -126,7 +111,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 1500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out b/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out index c76d790e49..1a24361de5 100644 --- a/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_nullscan.q.out @@ -169,8 +169,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -190,6 +189,13 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: src_orc_n1 + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -198,11 +204,12 @@ STAGE PLANS: Number of rows: 0 Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string) Execution mode: vectorized, llap - LLAP IO: all inputs + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -221,21 +228,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 3 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out index 8945b7a478..5fe2395f61 100644 --- a/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/llap/optimize_nullscan.q.out @@ -165,8 +165,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -259,12 +258,13 @@ STAGE PLANS: Number of rows: 0 Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - null sort order: - sort order: + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 - value expressions: _col0 (type: string) - auto parallelism: false + tag: 1 + auto parallelism: true Execution mode: vectorized, llap LLAP IO: no inputs Path -> Alias: @@ -505,25 +505,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - auto parallelism: true Stage: Stage-0 Fetch Operator @@ -979,7 +960,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 0 2000 -Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[12][tables = [a, b]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain extended select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b PREHOOK: type: QUERY @@ -1009,8 +990,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1105,7 +1085,7 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE - tag: -1 + tag: 1 value expressions: _col0 (type: string) auto parallelism: false Execution mode: vectorized, llap @@ -1348,24 +1328,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Execution mode: vectorized, llap - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 0 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE - tag: 1 - value expressions: _col0 (type: string) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -1373,7 +1335,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[15][tables = [a, b]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[12][tables = [a, b]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out index c366443f84..57ba67eb15 100644 --- a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out @@ -162,8 +162,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -255,11 +254,12 @@ STAGE PLANS: Number of rows: 0 Statistics: Num rows: 0 Data size: 0 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: -1 - value expressions: _col0 (type: string) + tag: 1 auto parallelism: false Execution mode: vectorized Path -> Alias: @@ -498,25 +498,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Execution mode: vectorized - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Limit - Number of rows: 0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 1 - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -967,7 +948,7 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 #### A masked pattern was here #### 0 2000 -Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [a, b]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain extended select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b PREHOOK: type: QUERY @@ -996,8 +977,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 4 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1091,7 +1071,7 @@ STAGE PLANS: null sort order: sort order: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: -1 + tag: 1 value expressions: _col0 (type: string) auto parallelism: false Execution mode: vectorized @@ -1331,24 +1311,6 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 4 - Execution mode: vectorized - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Limit - Number of rows: 0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - tag: 1 - value expressions: _col0 (type: string) - auto parallelism: false Stage: Stage-0 Fetch Operator @@ -1356,7 +1318,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[11][tables = [a, b]] in Work 'Reducer 2' is a cross product +Warning: Shuffle Join JOIN[8][tables = [a, b]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from (select key from src where false) a left outer join (select value from srcpart limit 0) b PREHOOK: type: QUERY PREHOOK: Input: default@src