diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 8bff2a9..1f1b7c4 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -804,9 +804,9 @@ HIVEOUTERJOINSUPPORTSFILTERS("hive.outerjoin.supports.filters", true), // 'minimal', 'more' (and 'all' later) - HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "minimal", + HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "more", new StringsValidator("minimal", "more")), - HIVEFETCHTASKCONVERSIONTHRESHOLD("hive.fetch.task.conversion.threshold", -1l), + HIVEFETCHTASKCONVERSIONTHRESHOLD("hive.fetch.task.conversion.threshold", 1024*1024*1024), HIVEFETCHTASKAGGR("hive.fetch.task.aggr", false), diff --git conf/hive-default.xml.template conf/hive-default.xml.template index 4944dfc..93d46fe 100644 --- conf/hive-default.xml.template +++ conf/hive-default.xml.template @@ -2054,7 +2054,7 @@ hive.fetch.task.conversion - minimal + more Some select queries can be converted to single FETCH task minimizing latency. Currently the query should be single sourced not having any subquery and should not have @@ -2066,7 +2066,7 @@ hive.fetch.task.conversion.threshold - -1 + 1073741824 Input threshold for applying hive.fetch.task.conversion. If target table is native, input length is calculated by summation of file lengths. If it's not native, storage handler for the table diff --git data/conf/hive-site.xml data/conf/hive-site.xml index 1c9c598..02cc122 100644 --- data/conf/hive-site.xml +++ data/conf/hive-site.xml @@ -221,4 +221,9 @@ false + + hive.fetch.task.conversion + minimal + + diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index 7413d2b..1e45a25 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -106,9 +106,9 @@ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator so pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSION); boolean aggressive = "more".equals(mode); + final int limit = pctx.getQB().getParseInfo().getOuterQueryLimit(); FetchData fetch = checkTree(aggressive, pctx, alias, source); - if (fetch != null && checkThreshold(fetch, pctx)) { - int limit = pctx.getQB().getParseInfo().getOuterQueryLimit(); + if (fetch != null && checkThreshold(fetch, limit, pctx)) { FetchWork fetchWork = fetch.convertToWork(); FetchTask fetchTask = (FetchTask) TaskFactory.get(fetchWork, pctx.getConf()); fetchWork.setSink(fetch.completed(pctx, fetchWork)); @@ -119,12 +119,15 @@ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator so return null; } - private boolean checkThreshold(FetchData data, ParseContext pctx) throws Exception { + private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception { long threshold = HiveConf.getLongVar(pctx.getConf(), HiveConf.ConfVars.HIVEFETCHTASKCONVERSIONTHRESHOLD); if (threshold < 0) { return true; } + if(limit > 0 && data.hasOnlyPruningFilter()) { + return true; + } long remaining = threshold; remaining -= data.getInputLength(pctx, remaining); if (remaining < 0) { @@ -169,7 +172,7 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, PrunedPartitionList pruned = pctx.getPrunedPartitions(alias, ts); if (aggressive || !pruned.hasUnknownPartitions()) { bypassFilter &= !pruned.hasUnknownPartitions(); - return checkOperators(new FetchData(parent, table, pruned, splitSample), ts, + return checkOperators(new FetchData(parent, table, pruned, splitSample, bypassFilter), ts, aggressive, bypassFilter); } } @@ -211,6 +214,7 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean private final SplitSample splitSample; private final PrunedPartitionList partsList; private final HashSet inputs = new HashSet(); + private final boolean onlyPruningFilter; // source table scan private TableScanOperator scanOp; @@ -223,14 +227,23 @@ private FetchData(ReadEntity parent, Table table, SplitSample splitSample) { this.table = table; this.partsList = null; this.splitSample = splitSample; + this.onlyPruningFilter = false; } private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList, - SplitSample splitSample) { + SplitSample splitSample, boolean bypassFilter) { this.parent = parent; this.table = table; this.partsList = partsList; this.splitSample = splitSample; + this.onlyPruningFilter = bypassFilter; + } + + /* + * all filters were executed during partition pruning + */ + public boolean hasOnlyPruningFilter() { + return this.onlyPruningFilter; } private FetchWork convertToWork() throws HiveException {