diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java index efbd85806d..d5a2460c66 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java @@ -217,6 +217,11 @@ private void pushRankLimit(PTFOperator ptfOp, OpWalkerInfo owi) throws SemanticE } WindowTableFunctionDef wTFn = (WindowTableFunctionDef) conf.getFuncDef(); + if (wTFn.getPartition() != null) { + // Not applicable if over clause contains partition columns + return; + } + List rFnIdxs = rankingFunctions(wTFn); if ( rFnIdxs.size() == 0 ) { diff --git a/ql/src/test/queries/clientpositive/perf/query70subq.q b/ql/src/test/queries/clientpositive/perf/query70subq.q new file mode 100644 index 0000000000..0f6ed1a2e9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/perf/query70subq.q @@ -0,0 +1,8 @@ +set hive.explain.user=false; +set hive.tez.dynamic.partition.pruning=false; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=1431655765; + + +explain +select s_state, ranking from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5; diff --git a/ql/src/test/results/clientpositive/perf/tez/query70subq.q.out b/ql/src/test/results/clientpositive/perf/tez/query70subq.q.out new file mode 100644 index 0000000000..b212640ccc --- /dev/null +++ b/ql/src/test/results/clientpositive/perf/tez/query70subq.q.out @@ -0,0 +1,190 @@ +PREHOOK: query: explain +select s_state, ranking from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_dim +PREHOOK: Input: default@store +PREHOOK: Input: default@store_sales +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: explain +select s_state, ranking from (select s_state as s_state, sum(ss_net_profit), rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking from store_sales, store, date_dim where d_month_seq between 1193 and 1193+11 and date_dim.d_date_sk = store_sales.ss_sold_date_sk and store.s_store_sk = store_sales.ss_store_sk group by s_state ) tmp1 where ranking <= 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_dim +POSTHOOK: Input: default@store +POSTHOOK: Input: default@store_sales +POSTHOOK: Output: hdfs://### HDFS PATH ### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: store_sales + filterExpr: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 575995635 Data size: 66010002860 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) + Statistics: Num rows: 525329897 Data size: 60203629900 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_sold_date_sk (type: int), ss_store_sk (type: int), ss_net_profit (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 525329897 Data size: 60203629900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 525329897 Data size: 60203629900 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: decimal(7,2)) + Execution mode: vectorized + Map 6 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_month_seq BETWEEN 1193 AND 1204 and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 317 Data size: 2536 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 317 Data size: 1268 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 317 Data size: 1268 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Map 7 + Map Operator Tree: + TableScan + alias: store + filterExpr: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 153360 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 1704 Data size: 153360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int), s_state (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1704 Data size: 153360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1704 Data size: 153360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 525329897 Data size: 58196867052 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 525329897 Data size: 58196867052 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(7,2)) + Reducer 3 + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2, _col5 + Statistics: Num rows: 525329897 Data size: 101368452054 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col5 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 19404 Data size: 3841992 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 19404 Data size: 3841992 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Reducer 4 + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 49 Data size: 9702 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 49 Data size: 9702 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 5 + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 49 Data size: 9702 Basic stats: COMPLETE Column stats: COMPLETE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: decimal(17,2) + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 DESC NULLS LAST + partition by: _col0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 49 Data size: 9702 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (rank_window_0 <= 5) (type: boolean) + Statistics: Num rows: 16 Data size: 3168 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), rank_window_0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 16 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +