diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 8207599..f210500 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -143,8 +143,15 @@ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator so } private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception { - if (limit > 0 && data.hasOnlyPruningFilter()) { - return true; + if (limit > 0) { + if (data.hasOnlyPruningFilter()) { + /* partitioned table + query has only pruning filters */ + return true; + } else if (data.isPartitioned() == false && data.isFiltered() == false) { + /* partitioned table + query has only pruning filters */ + return true; + } + /* fall through */ } long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD); @@ -228,6 +235,10 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean if (op.getChildOperators() == null || op.getChildOperators().size() != 1) { return null; } + + if (op instanceof FilterOperator) { + fetch.setFiltered(true); + } } if (op instanceof FileSinkOperator) { @@ -279,6 +290,11 @@ private boolean isConvertible(FetchData fetch, Operator operator, Set operator, Set fileSink; + private boolean filtered; private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, SplitSample splitSample) { this.scanOp = scanOp; @@ -337,10 +354,23 @@ private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, Prun /* * all filters were executed during partition pruning */ - public boolean hasOnlyPruningFilter() { + public final boolean hasOnlyPruningFilter() { return this.onlyPruningFilter; } + public final boolean isPartitioned() { + return this.table.isPartitioned(); + } + + /* there are filter operators in the pipeline */ + public final boolean isFiltered() { + return this.filtered; + } + + public final void setFiltered(boolean filtered) { + this.filtered = filtered; + } + private FetchWork convertToWork() throws HiveException { inputs.clear(); if (!table.isPartitioned()) { diff --git ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out index d7bd42b..46c22c0 100644 --- ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch_threshold.q.out @@ -125,34 +125,22 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select cast(key as int) * 10, upper(value) from src limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - ListSink + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (UDFToInteger(key) * 10) (type: int), upper(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + ListSink