diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 4239c90..734c126 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -210,11 +210,30 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, bypassFilter = !pctx.getPrunedPartitions(alias, ts).hasUnknownPartitions(); } } - if (!aggressive && !bypassFilter) { + + boolean onlyPruningFilter = bypassFilter; + Operator op = ts; + while (onlyPruningFilter) { + if (op instanceof FileSinkOperator || op.getChildOperators() == null) { + break; + } else if (op.getChildOperators().size() != 1) { + onlyPruningFilter = false; + break; + } else { + op = op.getChildOperators().get(0); + } + + if (op instanceof FilterOperator) { + onlyPruningFilter = false; + } + } + + if (!aggressive && !onlyPruningFilter) { return null; } + PrunedPartitionList partitions = pctx.getPrunedPartitions(alias, ts); - FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, bypassFilter); + FetchData fetch = new FetchData(ts, parent, table, partitions, splitSample, onlyPruningFilter); return checkOperators(fetch, aggressive, bypassFilter); } diff --git ql/src/test/queries/clientpositive/nonmr_fetch.q ql/src/test/queries/clientpositive/nonmr_fetch.q index dffdc2c..c766d67 100644 --- ql/src/test/queries/clientpositive/nonmr_fetch.q +++ ql/src/test/queries/clientpositive/nonmr_fetch.q @@ -13,6 +13,10 @@ select * from srcpart where ds='2008-04-08' AND hr='11' limit 10; explain select key from src limit 10; select key from src limit 10; +-- negative, filter on partition column and non-partition column +explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10; +select * from srcpart where ds='2008-04-08' AND key > 100 limit 10; + -- negative, filter on non-partition column explain select * from srcpart where key > 100 limit 10; select * from srcpart where key > 100 limit 10; diff --git ql/src/test/results/clientpositive/nonmr_fetch.q.out ql/src/test/results/clientpositive/nonmr_fetch.q.out index 5607089..7fa562b 100644 --- ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -126,32 +126,105 @@ POSTHOOK: Input: default@src 278 98 484 +PREHOOK: query: explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from srcpart where ds='2008-04-08' AND key > 100 limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 100.0) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), '2008-04-08' (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select * from srcpart where ds='2008-04-08' AND key > 100 limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select * from srcpart where ds='2008-04-08' AND key > 100 limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +238 val_238 2008-04-08 11 +311 val_311 2008-04-08 11 +165 val_165 2008-04-08 11 +409 val_409 2008-04-08 11 +255 val_255 2008-04-08 11 +278 val_278 2008-04-08 11 +484 val_484 2008-04-08 11 +265 val_265 2008-04-08 11 +193 val_193 2008-04-08 11 +401 val_401 2008-04-08 11 PREHOOK: query: explain select * from srcpart where key > 100 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain select * from srcpart where key > 100 limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 100.0) (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: srcpart - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) > 100.0) (type: boolean) - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string), ds (type: string), hr (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - ListSink + ListSink PREHOOK: query: select * from srcpart where key > 100 limit 10 PREHOOK: type: QUERY