diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 906dadf..8efd958 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -136,8 +136,15 @@ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator so } private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception { - if (limit > 0 && data.hasOnlyPruningFilter()) { - return true; + if (limit > 0) { + if (data.hasOnlyPruningFilter()) { + /* partitioned table + query has only pruning filters */ + return true; + } else if (data.isPartitioned() == false && data.isFiltered() == false) { + /* partitioned table + query has only pruning filters */ + return true; + } + /* fall through */ } long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD); @@ -222,6 +229,10 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean if (op.getChildOperators() == null || op.getChildOperators().size() != 1) { return null; } + + if (op instanceof FilterOperator) { + fetch.setFiltered(true); + } } if (op instanceof FileSinkOperator) { @@ -274,6 +285,7 @@ private boolean checkExpression(ExprNodeDesc expr) { // this is always non-null when conversion is completed private Operator fileSink; + private boolean filtered; private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { this.parent = parent; @@ -295,10 +307,23 @@ private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList, /* * all filters were executed during partition pruning */ - public boolean hasOnlyPruningFilter() { + public final boolean hasOnlyPruningFilter() { return this.onlyPruningFilter; } + public final boolean isPartitioned() { + return this.table.isPartitioned(); + } + + /* there are filter operators in the pipeline */ + public final boolean isFiltered() { + return this.filtered; + } + + public final void setFiltered(boolean filtered) { + this.filtered = filtered; + } + private FetchWork convertToWork() throws HiveException { inputs.clear(); if (!table.isPartitioned()) { diff --git ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q index b1a7cb5..5756aec 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q +++ ql/src/test/queries/clientpositive/nonmr_fetch_threshold.q @@ -2,9 +2,13 @@ set hive.fetch.task.conversion=more; explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select cast(key as int) * 10, upper(value) from src limit 10; +-- Scans without limit (should be Fetch task now) +explain select concat(key, value) from src; set hive.fetch.task.conversion.threshold=100; -- from HIVE-7397, limit + partition pruning filter explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select cast(key as int) * 10, upper(value) from src limit 10; +-- Scans without limit (should not be Fetch task now) +explain select concat(key, value) from src; diff --git ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index cb0d332..ca7d2d9 100644 --- ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -46,6 +46,29 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE ListSink +PREHOOK: query: -- Scans without limit (should be Fetch task now) +explain select concat(key, value) from src +PREHOOK: type: QUERY +POSTHOOK: query: -- Scans without limit (should be Fetch task now) +explain select concat(key, value) from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: concat(key, value) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + ListSink + PREHOOK: query: -- from HIVE-7397, limit + partition pruning filter explain select * from srcpart where ds='2008-04-08' AND hr='11' limit 10 PREHOOK: type: QUERY @@ -77,6 +100,32 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: -- Scans without limit (should not be Fetch task now) +explain select concat(key, value) from src +PREHOOK: type: QUERY +POSTHOOK: query: -- Scans without limit (should not be Fetch task now) +explain select concat(key, value) from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -88,23 +137,20 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) - outputColumnNames: _col0, _col1 + expressions: concat(key, value) (type: string) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator - limit: 10 + limit: -1 Processor Tree: ListSink