diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index af74fff..9e207cf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.exec.FetchTask; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.ListSinkOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -72,6 +73,7 @@ import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFInFile; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate; @@ -141,12 +143,14 @@ private FetchTask optimize(ParseContext pctx, String alias, TableScanOperator so } private boolean checkThreshold(FetchData data, int limit, ParseContext pctx) throws Exception { - if (limit > 0) { - if (data.hasOnlyPruningFilter()) { - /* partitioned table + query has only pruning filters */ + if (data.hasOnlyPruningFilter() + || (data.isPartitioned() == false && data.isFiltered() == false)) { + /* partitioned table + query has only pruning filters */ + /* unpartitioned table + no filters */ + if (limit > 0) { return true; - } else if (data.isPartitioned() == false && data.isFiltered() == false) { - /* unpartitioned table + no filters */ + } else if(data.hasUserFunctions() == false) { + // no UDFs, allow execution locally return true; } /* fall through */ @@ -219,7 +223,7 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean Operator op = ts.getChildOperators().get(0); for (; ; op = op.getChildOperators().get(0)) { if (op instanceof SelectOperator) { - if (!checkExpressions((SelectOperator) op)) { + if (!checkExpressions((SelectOperator) op, false)) { return null; } continue; @@ -246,20 +250,20 @@ private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean return null; } - private boolean checkExpressions(SelectOperator op) { + private boolean checkExpressions(SelectOperator op, boolean allowBuiltins) { SelectDesc desc = op.getConf(); if (desc.isSelectStar() || desc.isSelStarNoCompute()) { return true; } for (ExprNodeDesc expr : desc.getColList()) { - if (!checkExpression(expr)) { + if (!checkExpression(expr, allowBuiltins)) { return false; } } return true; } - private boolean checkExpression(ExprNodeDesc expr) { + private boolean checkExpression(ExprNodeDesc expr, boolean allowBuiltins) { if (expr instanceof ExprNodeConstantDesc || expr instanceof ExprNodeColumnDesc) { return true; @@ -267,16 +271,37 @@ private boolean checkExpression(ExprNodeDesc expr) { if (expr instanceof ExprNodeGenericFuncDesc) { GenericUDF udf = ((ExprNodeGenericFuncDesc) expr).getGenericUDF(); - if (udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar + boolean isBuiltin = isGoodBuiltinUDF((ExprNodeGenericFuncDesc)expr); + boolean isCast = (udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToDecimal || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp - || udf instanceof GenericUDFToVarchar) { - return expr.getChildren().size() == 1 && checkExpression(expr.getChildren().get(0)); + || udf instanceof GenericUDFToVarchar); + if(isCast) { + return expr.getChildren().size() == 1 && checkExpression(expr.getChildren().get(0), allowBuiltins); + } else if (isBuiltin && allowBuiltins) { + for (ExprNodeDesc child : expr.getChildren()) { + if (!checkExpression(child, allowBuiltins)) { + return false; + } + } + return true; } } return false; } + private boolean isGoodBuiltinUDF(ExprNodeGenericFuncDesc expr) { + if (!FunctionRegistry.isBuiltInFuncExpr(expr)) { + return false; + } + + if (expr.getGenericUDF() instanceof GenericUDFInFile) { + // only known bad builtin - JOINs are faster + return false; + } + return true; + } + private boolean isConvertible(FetchData fetch) { return isConvertible(fetch, fetch.scanOp, new HashSet>()); } @@ -291,6 +316,12 @@ private boolean isConvertible(FetchData fetch, Operator operator, Set operator, Set fileSink; private boolean filtered; + private boolean udfs; private FetchData(TableScanOperator scanOp, ReadEntity parent, Table table, SplitSample splitSample) { this.scanOp = scanOp; @@ -367,6 +399,14 @@ public final void setFiltered(boolean filtered) { this.filtered = filtered; } + public final void setUserFunctions(boolean udfs) { + this.udfs = true; + } + + public final boolean hasUserFunctions() { + return this.udfs; + } + private FetchWork convertToWork() throws HiveException { inputs.clear(); TableDesc tableDesc = Utilities.getTableDesc(table);