diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 8207599..f210500 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -143,8 +143,15 @@ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator so } private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception { - if (limit > 0 && data.hasOnlyPruningFilter()) { - return true; + if (limit > 0) { + if (data.hasOnlyPruningFilter()) { + /* partitioned table + query has only pruning filters */ + return true; + } else if (data.isPartitioned() == false && data.isFiltered() == false) { + /* partitioned table + query has only pruning filters */ + return true; + } + /* fall through */ } long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD); @@ -228,6 +235,10 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean if (op.getChildOperators() == null || op.getChildOperators().size() != 1) { return null; } + + if (op instanceof FilterOperator) { + fetch.setFiltered(true); + } } if (op instanceof FileSinkOperator) { @@ -279,6 +290,11 @@ private boolean isConvertible(FetchData fetch, Operator operator, Set operator, Set fileSink; + private boolean filtered; private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, SplitSample splitSample) { this.scanOp = scanOp; @@ -337,10 +354,23 @@ private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, Prun /* * all filters were executed during partition pruning */ - public boolean hasOnlyPruningFilter() { + public final boolean hasOnlyPruningFilter() { return this.onlyPruningFilter; } + public final boolean isPartitioned() { + return this.table.isPartitioned(); + } + + /* there are filter operators in the pipeline */ + public final boolean isFiltered() { + return this.filtered; + } + + public final void setFiltered(boolean filtered) { + this.filtered = filtered; + } + private FetchWork convertToWork() throws HiveException { inputs.clear(); if (!table.isPartitioned()) { diff --git ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q index 959212b..26f6f5b 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q +++ ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q @@ -7,9 +7,13 @@ set hive.fetch.task.conversion.threshold=10000; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select cast(key as int) * 10, upper(value) from src limit 10; +-- Scans without limit (should be Fetch task now) +explain select concat(key, value) from src; set hive.fetch.task.conversion.threshold=100; -- from HIVE-7397, limit + partition pruning filter explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select cast(key as int) * 10, upper(value) from src limit 10; +-- Scans without limit (should not be Fetch task now) +explain select concat(key, value) from src; diff --git ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index d7bd42b..6bfc624 100644 --- ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -94,6 +94,29 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE ListSink +PREHOOK: query: -- Scans without limit (should be Fetch task now) +explain select concat(key, value) from src +PREHOOK: type: QUERY +POSTHOOK: query: -- Scans without limit (should be Fetch task now) +explain select concat(key, value) from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: concat(key, value) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: -- from HIVE-7397, limit + partition pruning filter explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 PREHOOK: type: QUERY @@ -125,6 +148,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: -- Scans without limit (should not be Fetch task now) +explain select concat(key, value) from src +PREHOOK: type: QUERY +POSTHOOK: query: -- Scans without limit (should not be Fetch task now) +explain select concat(key, value) from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -136,23 +185,20 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) - outputColumnNames: _col0, _col1 + expressions: concat(key, value) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink