diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a8411c9edb2f2db84cf2540deb20133c36152103..57d6b210fb66f6a47dd30eabd03072a6a5347f36 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1072,6 +1072,7 @@ "Whether to push predicates down into storage handlers. Ignored when hive.optimize.ppd is false."), // Constant propagation optimizer HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), + HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), HIVEMETADATAONLYQUERIES("hive.optimize.metadataonly", true, ""), HIVENULLSCANOPTIMIZE("hive.optimize.null.scan", true, "Dont scan relations which are guaranteed to not generate any rows"), HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java index 95d2d76c80aa59b62e9464f704523d921302d401..dabe2a8d1cdf6a3b57a3fb34a9791a824e6b22e7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SelectOperator.java @@ -138,12 +138,6 @@ public boolean isIdentitySelect() { return false; } - //Select * - if(this.getConf().isSelStarNoCompute() || - this.getConf().isSelectStar()) { - return true; - } - //Check whether the have the same schema if(!OperatorUtils.sameRowSchema(this, this.getParentOperators().get(0))) { return false; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java new file mode 100644 index 0000000000000000000000000000000000000000..60ef9dd8c65fcd466d441655310d48000e839af0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/IdentityProjectRemover.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; + +/** This optimization tries to remove {@link SelectOperator} from tree which don't do any + * processing except forwarding columns from its parent to its children. + * e.g., select * from (select * from src where key = value) t1 join (select * from src where key = value) t2; + * Query tree + * + * Without this optimization: + * + * TS -> FIL -> SEL -> RS -> + * JOIN -> SEL -> FS + * TS -> FIL -> SEL -> RS -> + * + * With this optimization + * + * TS -> FIL -> RS -> + * JOIN -> FS + * TS -> FIL -> RS -> + * + * Note absence of select operator after filter and after join operator. + * Also, see : identity_proj_remove.q + */ +public class IdentityProjectRemover implements Transform { + + private static final Log LOG = LogFactory.getLog(IdentityProjectRemover.class); + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", + "(" + SelectOperator.getOperatorName() + "%)"), new ProjectRemover()); + GraphWalker ogw = new DefaultGraphWalker(new DefaultRuleDispatcher(null, opRules, null)); + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pctx; + } + + private static class ProjectRemover implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + SelectOperator sel = (SelectOperator)nd; + List> parents = sel.getParentOperators(); + if (parents.size() != 1 || parents.get(0) instanceof LateralViewForwardOperator) { + // Multi parents, cant handle that. + // Right now, we do not remove projection on top of + // LateralViewForward operators. + return null; + } + Operator parent = parents.get(0); + if(sel.isIdentitySelect()) { + parent.removeChildAndAdoptItsChildren(sel); + LOG.debug("Identity project remover optimization removed : " + sel); + } + return null; + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index 5be0e4540a6843c6b40cb5c22db6e90e1f0da922..95b678b76b6621f933795c3560468b9ec3141d0d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -134,6 +134,9 @@ public void initialize(HiveConf hiveConf) { transformations.add(new ReduceSinkDeDuplication()); } transformations.add(new NonBlockingOpDeDupProc()); + if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEIDENTITYPROJECTREMOVER)) { + transformations.add(new IdentityProjectRemover()); + } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVELIMITOPTENABLE)) { transformations.add(new GlobalLimitOptimizer()); }