diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java index c856623..e45f6bd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java @@ -44,9 +44,9 @@ import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.ContentSummaryInputFormat; import org.apache.hadoop.hive.ql.io.HiveInputFormat; -import org.apache.hadoop.hive.ql.metadata.InputEstimator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; +import org.apache.hadoop.hive.ql.metadata.InputEstimator; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; @@ -55,13 +55,25 @@ import org.apache.hadoop.hive.ql.parse.QB; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.SplitSample; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.ListSinkDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToChar; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDate; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToVarchar; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; @@ -73,9 +85,11 @@ private final Log LOG = LogFactory.getLog(SimpleFetchOptimizer.class.getName()); + @Override public ParseContext transform(ParseContext pctx) throws SemanticException { Map> topOps = pctx.getTopOps(); - if (pctx.getQB().isSimpleSelectQuery() && topOps.size() == 1) { + if (pctx.getQB().getIsQuery() && !pctx.getQB().getParseInfo().isAnalyzeCommand() + && topOps.size() == 1) { // no join, no groupby, no distinct, no lateral view, no subq, // no CTAS or insert, not analyze command, and single sourced. String alias = (String) pctx.getTopOps().keySet().toArray()[0]; @@ -156,7 +170,7 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, return null; } - Table table = qb.getMetaData().getAliasToTable().get(alias); + Table table = pctx.getTopToTable().get(ts); if (table == null) { return null; } @@ -181,34 +195,71 @@ private FetchData checkTree(boolean aggressive, ParseContext pctx, String alias, return null; } - private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggresive, + private FetchData checkOperators(FetchData fetch, TableScanOperator ts, boolean aggressive, boolean bypassFilter) { if (ts.getChildOperators().size() != 1) { return null; } Operator op = ts.getChildOperators().get(0); for (; ; op = op.getChildOperators().get(0)) { - if (aggresive) { - if (!(op instanceof LimitOperator || op instanceof FilterOperator - || op instanceof SelectOperator)) { + if (op instanceof SelectOperator) { + if (!aggressive) { + if (!checkExpressions((SelectOperator) op)) { + break; + } + } + continue; + } + + if (aggressive) { + if (!(op instanceof LimitOperator || op instanceof FilterOperator)) { break; } - } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter) - || (op instanceof SelectOperator && ((SelectOperator) op).getConf().isSelectStar()))) { + } else if (!(op instanceof LimitOperator || (op instanceof FilterOperator && bypassFilter))) { break; } + if (op.getChildOperators() == null || op.getChildOperators().size() != 1) { return null; } } + if (op instanceof FileSinkOperator) { fetch.scanOp = ts; fetch.fileSink = op; return fetch; } + return null; } + private boolean checkExpressions(SelectOperator op) { + SelectDesc desc = op.getConf(); + for (ExprNodeDesc expr : desc.getColList()) { + if (!checkExpression(expr)) { + return false; + } + } + return true; + } + + private boolean checkExpression(ExprNodeDesc expr) { + if (expr instanceof ExprNodeConstantDesc || expr instanceof ExprNodeColumnDesc) { + return true; + } + + if (expr instanceof ExprNodeGenericFuncDesc) { + GenericUDF udf = ((ExprNodeGenericFuncDesc) expr).getGenericUDF(); + if (udf instanceof GenericUDFToBinary || udf instanceof GenericUDFToChar + || udf instanceof GenericUDFToDate || udf instanceof GenericUDFToDecimal + || udf instanceof GenericUDFToUnixTimeStamp || udf instanceof GenericUDFToUtcTimestamp + || udf instanceof GenericUDFToVarchar) { + return expr.getChildren().size() == 1 && checkExpression(expr.getChildren().get(0)); + } + } + return false; + } + private class FetchData { private final ReadEntity parent; @@ -240,7 +291,7 @@ private FetchData(ReadEntity parent, Table table, PrunedPartitionList partsList, this.splitSample = splitSample; this.onlyPruningFilter = bypassFilter; } - + /* * all filters were executed during partition pruning */