diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleStringExp.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleStringExp.java new file mode 100644 index 0000000..b85ebed --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleStringExp.java @@ -0,0 +1,66 @@ +package org.apache.hadoop.hive.ql.lib; + +import java.util.Stack; + +import org.apache.hadoop.hive.ql.parse.SemanticException; + +public class RuleStringExp implements Rule { + + private final String ruleName; + private final String pattern; + + /** + * The rule specified as operator names separated by % symbols, the left side represents the + * bottom of the stack. + * + * E.g. TS%FIL%RS -> means + * TableScan Node followed by Filter followed by ReduceSink in the tree, or, in terms of the + * stack, ReduceSink on top followed by Filter followed by TableScan + * + * @param ruleName + * name of the rule + * @param regExp + * string specification of the rule + **/ + public RuleStringExp(String ruleName, String pattern) { + this.ruleName = ruleName; + this.pattern = pattern; + } + + /** + * This function returns the cost of the rule for the specified stack. Returns 1 if there is + * an suffix string match with the entire stack, otherwise -1 + * + * If any proper substack of the stack matches it will return -1. It only returns 1 if the + * entire stack matches the rule exactly. + * + * @param stack + * Node stack encountered so far + * @return cost of the function + * @throws SemanticException + */ + public int cost(Stack stack) throws SemanticException { + int numElems = (stack != null ? stack.size() : 0); + String name = new String(); + int patLen = pattern.length(); + for (int pos = numElems - 1; pos >= 0; pos--) { + name = stack.get(pos).getName() + "%" + name; + if (name.length() >= patLen) { + if (pattern.equals(name)) { + return patLen; + } else { + return -1; + } + } + } + + return -1; + } + + /** + * @return the name of the Node + **/ + public String getName() { + return ruleName; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java index c1f1519..5299bb0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationOptimizer.java @@ -56,7 +56,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.MapJoinProcessor; import org.apache.hadoop.hive.ql.optimizer.Transform; @@ -218,7 +218,7 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // detect correlations CorrelationNodeProcCtx corrCtx = new CorrelationNodeProcCtx(pCtx); Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", ReduceSinkOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("R1", ReduceSinkOperator.getOperatorName() + "%"), new CorrelationNodeProc()); Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, corrCtx); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 7b5f9b2..4241837 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -42,7 +42,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -83,12 +83,12 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // If multiple rules can be matched with same cost, last rule will be choosen as a processor // see DefaultRuleDispatcher#dispatch() Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", RS + "%.*%" + RS + "%"), + opRules.put(new RuleStringExp("R1", RS + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getReducerReducerProc()); - opRules.put(new RuleRegExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"), + opRules.put(new RuleStringExp("R2", RS + "%" + GBY + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getGroupbyReducerProc()); if (mergeJoins) { - opRules.put(new RuleRegExp("R3", JOIN + "%.*%" + RS + "%"), + opRules.put(new RuleStringExp("R3", JOIN + "%.*%" + RS + "%"), ReduceSinkDeduplicateProcFactory.getJoinReducerProc()); } // TODO RS+JOIN diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java index c930b80..d275c66 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/ExprProcFactory.java @@ -42,7 +42,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -185,12 +185,12 @@ public static Dependency getExprDependency(LineageCtx lctx, // generates the plan from the operator tree Map exprRules = new LinkedHashMap(); exprRules.put( - new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), + new RuleStringExp("R1", ExprNodeColumnDesc.class.getName() + "%"), getColumnProcessor()); exprRules.put( - new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), + new RuleStringExp("R2", ExprNodeFieldDesc.class.getName() + "%"), getFieldProcessor()); - exprRules.put(new RuleRegExp("R3", ExprNodeGenericFuncDesc.class.getName() + exprRules.put(new RuleStringExp("R3", ExprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor()); // The dispatcher fires the processor corresponding to the closest matching diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java index c304e97..1c51b1b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/metainfo/annotation/AnnotateWithOpTraits.java @@ -39,7 +39,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.PreOrderWalker; import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.OpTraitsRulesProcFactory; import org.apache.hadoop.hive.ql.parse.ParseContext; @@ -58,25 +58,25 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { // create a walker which walks the tree in a DFS manner while maintaining the // operator stack. The dispatcher generates the plan from the operator tree Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("TS", TableScanOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("TS", TableScanOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getTableScanRule()); - opRules.put(new RuleRegExp("RS", ReduceSinkOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("RS", ReduceSinkOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getReduceSinkRule()); - opRules.put(new RuleRegExp("JOIN", JoinOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("JOIN", JoinOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getJoinRule()); - opRules.put(new RuleRegExp("MAPJOIN", MapJoinOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("MAPJOIN", MapJoinOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getMultiParentRule()); - opRules.put(new RuleRegExp("SMB", SMBMapJoinOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("SMB", SMBMapJoinOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getMultiParentRule()); - opRules.put(new RuleRegExp("MUX", MuxOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("MUX", MuxOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getMultiParentRule()); - opRules.put(new RuleRegExp("DEMUX", DemuxOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("DEMUX", DemuxOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getMultiParentRule()); - opRules.put(new RuleRegExp("UNION", UnionOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("UNION", UnionOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getMultiParentRule()); - opRules.put(new RuleRegExp("GBY", GroupByOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("GBY", GroupByOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getGroupByRule()); - opRules.put(new RuleRegExp("SEL", SelectOperator.getOperatorName() + "%"), + opRules.put(new RuleStringExp("SEL", SelectOperator.getOperatorName() + "%"), OpTraitsRulesProcFactory.getSelectRule()); // The dispatcher fires the processor corresponding to the closest matching diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index d5102bc..32e2757 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -34,7 +34,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; @@ -437,12 +437,12 @@ public static NodeInfoWrapper walkExprTree( Map exprRules = new LinkedHashMap(); exprRules.put( - new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), + new RuleStringExp("R1", ExprNodeColumnDesc.class.getName() + "%"), getColumnProcessor()); exprRules.put( - new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), + new RuleStringExp("R2", ExprNodeFieldDesc.class.getName() + "%"), getFieldProcessor()); - exprRules.put(new RuleRegExp("R5", ExprNodeGenericFuncDesc.class.getName() + exprRules.put(new RuleStringExp("R5", ExprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor()); // The dispatcher fires the processor corresponding to the closest matching diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index 3a07b17..268a3bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -268,12 +269,12 @@ public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, // generates the plan from the operator tree Map exprRules = new LinkedHashMap(); exprRules.put( - new RuleRegExp("R1", ExprNodeColumnDesc.class.getName() + "%"), + new RuleStringExp("R1", ExprNodeColumnDesc.class.getName() + "%"), getColumnProcessor()); exprRules.put( - new RuleRegExp("R2", ExprNodeFieldDesc.class.getName() + "%"), + new RuleStringExp("R2", ExprNodeFieldDesc.class.getName() + "%"), getFieldProcessor()); - exprRules.put(new RuleRegExp("R3", ExprNodeGenericFuncDesc.class.getName() + exprRules.put(new RuleStringExp("R3", ExprNodeGenericFuncDesc.class.getName() + "%"), getGenericFuncProcessor()); // The dispatcher fires the processor corresponding to the closest matching diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java index 7f26f0f..a76f7a4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -92,34 +93,34 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { OpWalkerInfo opWalkerInfo = new OpWalkerInfo(pGraphContext); Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", + opRules.put(new RuleStringExp("R1", FilterOperator.getOperatorName() + "%"), OpProcFactory.getFilterProc()); - opRules.put(new RuleRegExp("R2", + opRules.put(new RuleStringExp("R2", PTFOperator.getOperatorName() + "%"), OpProcFactory.getPTFProc()); - opRules.put(new RuleRegExp("R3", + opRules.put(new RuleStringExp("R3", CommonJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc()); - opRules.put(new RuleRegExp("R4", + opRules.put(new RuleStringExp("R4", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc()); - opRules.put(new RuleRegExp("R5", + opRules.put(new RuleStringExp("R5", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getSCRProc()); - opRules.put(new RuleRegExp("R6", + opRules.put(new RuleStringExp("R6", LimitOperator.getOperatorName() + "%"), OpProcFactory.getLIMProc()); - opRules.put(new RuleRegExp("R7", + opRules.put(new RuleStringExp("R7", UDTFOperator.getOperatorName() + "%"), OpProcFactory.getUDTFProc()); - opRules.put(new RuleRegExp("R8", + opRules.put(new RuleStringExp("R8", LateralViewForwardOperator.getOperatorName() + "%"), OpProcFactory.getLVFProc()); - opRules.put(new RuleRegExp("R9", + opRules.put(new RuleStringExp("R9", LateralViewJoinOperator.getOperatorName() + "%"), OpProcFactory.getLVJProc()); - opRules.put(new RuleRegExp("R10", + opRules.put(new RuleStringExp("R10", ReduceSinkOperator.getOperatorName() + "%"), OpProcFactory.getRSProc()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java index ea1f713..fd7ffdf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicateTransitivePropagate.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.lib.PreOrderWalker; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -65,7 +66,7 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { pGraphContext = pctx; Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", "(" + + opRules.put(new RuleStringExp("R1", "(" + FilterOperator.getOperatorName() + "%" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinTransitive()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java index 363e49e..252ca07 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/SyntheticJoinPredicate.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.lib.PreOrderWalker; import org.apache.hadoop.hive.ql.lib.Rule; import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.RuleStringExp; import org.apache.hadoop.hive.ql.optimizer.Transform; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; @@ -73,7 +74,7 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { } Map opRules = new LinkedHashMap(); - opRules.put(new RuleRegExp("R1", "(" + + opRules.put(new RuleStringExp("R1", "(" + TableScanOperator.getOperatorName() + "%" + ".*" + ReduceSinkOperator.getOperatorName() + "%" + JoinOperator.getOperatorName() + "%)"), new JoinSynthetic());