diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index b9f39fb..7b7559a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -334,6 +334,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ tez_join_tests.q,\ tez_joins_explain.q,\ tez_schema_evolution.q,\ + tez_self_join.q,\ tez_union.q,\ tez_union2.q,\ tez_union_dynamic_partition.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java index 0027960..b5ee4ef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagate.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -62,8 +63,15 @@ private static final Log LOG = LogFactory.getLog(ConstantPropagate.class); protected ParseContext pGraphContext; + private ConstantPropagateOption constantPropagateOption; - public ConstantPropagate() {} + public ConstantPropagate() { + this(ConstantPropagateOption.FULL); + } + + public ConstantPropagate(ConstantPropagateOption option) { + this.constantPropagateOption = option; + } /** * Transform the query tree. @@ -76,7 +84,7 @@ public ParseContext transform(ParseContext pactx) throws SemanticException { pGraphContext = pactx; // generate pruned column list for all relevant operators - ConstantPropagateProcCtx cppCtx = new ConstantPropagateProcCtx(); + ConstantPropagateProcCtx cppCtx = new ConstantPropagateProcCtx(constantPropagateOption); // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. The dispatcher diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java index 6bb2a09..f30e330 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcCtx.java @@ -43,16 +43,28 @@ */ public class ConstantPropagateProcCtx implements NodeProcessorCtx { + public enum ConstantPropagateOption { + FULL, // Do full constant propagation + SHORTCUT, // Only perform expression short-cutting - remove unnecessary AND/OR operators + // if one of the child conditions is true/false. + }; + private static final org.apache.commons.logging.Log LOG = LogFactory .getLog(ConstantPropagateProcCtx.class); private final Map, Map> opToConstantExprs; private final List> opToDelete; + private ConstantPropagateOption constantPropagateOption = ConstantPropagateOption.FULL; public ConstantPropagateProcCtx() { + this(ConstantPropagateOption.FULL); + } + + public ConstantPropagateProcCtx(ConstantPropagateOption option) { opToConstantExprs = new HashMap, Map>(); opToDelete = new ArrayList>(); + this.constantPropagateOption = option; } public Map, Map> getOpToConstantExprs() { @@ -184,4 +196,13 @@ public void addOpToDelete(Operator op) { public List> getOpToDelete() { return opToDelete; } + + public ConstantPropagateOption getConstantPropagateOption() { + return constantPropagateOption; + } + + public void setConstantPropagateOption( + ConstantPropagateOption constantPropagateOption) { + this.constantPropagateOption = constantPropagateOption; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java index 4a4814d..f9df8e5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; @@ -193,6 +194,72 @@ public static ExprNodeDesc foldExpr(ExprNodeGenericFuncDesc funcDesc) { } return evaluateFunction(funcDesc.getGenericUDF(),funcDesc.getChildren(), funcDesc.getChildren()); } + + /** + * Fold input expression desc. + * + * @param desc folding expression + * @param constants current propagated constant map + * @param cppCtx + * @param op processing operator + * @param propagate if true, assignment expressions will be added to constants. + * @return fold expression + * @throws UDFArgumentException + */ + private static ExprNodeDesc foldExpr(ExprNodeDesc desc, Map constants, + ConstantPropagateProcCtx cppCtx, Operator op, int tag, + boolean propagate) throws UDFArgumentException { + if (cppCtx.getConstantPropagateOption() == ConstantPropagateOption.SHORTCUT) { + return foldExprShortcut(desc, constants, cppCtx, op, tag, propagate); + } + return foldExprFull(desc, constants, cppCtx, op, tag, propagate); + } + + /** + * Fold input expression desc, only performing short-cutting. + * + * Unnecessary AND/OR operations involving a constant true/false value will be eliminated. + * + * @param desc folding expression + * @param constants current propagated constant map + * @param cppCtx + * @param op processing operator + * @param propagate if true, assignment expressions will be added to constants. + * @return fold expression + * @throws UDFArgumentException + */ + private static ExprNodeDesc foldExprShortcut(ExprNodeDesc desc, Map constants, + ConstantPropagateProcCtx cppCtx, Operator op, int tag, + boolean propagate) throws UDFArgumentException { + if (desc instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) desc; + + GenericUDF udf = funcDesc.getGenericUDF(); + + boolean propagateNext = propagate && propagatableUdfs.contains(udf.getClass()); + List newExprs = new ArrayList(); + for (ExprNodeDesc childExpr : desc.getChildren()) { + newExprs.add(foldExpr(childExpr, constants, cppCtx, op, tag, propagateNext)); + } + + // Don't evalulate nondeterministic function since the value can only calculate during runtime. + if (!isDeterministicUdf(udf)) { + LOG.debug("Function " + udf.getClass() + " is undeterministic. Don't evalulating immediately."); + ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs); + return desc; + } + + // Check if the function can be short cut. + ExprNodeDesc shortcut = shortcutFunction(udf, newExprs, op); + if (shortcut != null) { + LOG.debug("Folding expression:" + desc + " -> " + shortcut); + return shortcut; + } + ((ExprNodeGenericFuncDesc) desc).setChildren(newExprs); + } + return desc; + } + /** * Fold input expression desc. * @@ -211,7 +278,7 @@ public static ExprNodeDesc foldExpr(ExprNodeGenericFuncDesc funcDesc) { * @return fold expression * @throws UDFArgumentException */ - private static ExprNodeDesc foldExpr(ExprNodeDesc desc, Map constants, + private static ExprNodeDesc foldExprFull(ExprNodeDesc desc, Map constants, ConstantPropagateProcCtx cppCtx, Operator op, int tag, boolean propagate) throws UDFArgumentException { if (desc instanceof ExprNodeGenericFuncDesc) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java index a7cf8b7..08b28e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java @@ -88,6 +88,11 @@ public void initialize(HiveConf hiveConf) { /* Add list bucketing pruner. */ transformations.add(new ListBucketingPruner()); } + + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTCONSTANTPROPAGATION)) { + // Run the constant propagation yet again, PPD may have created opportunities for folding + transformations.add(new ConstantPropagate()); + } } if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTGROUPBY) || diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java index 56707af..03d41f2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; +import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcCtx.ConstantPropagateOption; import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization; import org.apache.hadoop.hive.ql.optimizer.MergeJoinProc; @@ -304,8 +305,10 @@ private void runDynamicPartitionPruning(OptimizeTezProcContext procCtx, Set