diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java index 2fbb81b..8735d8c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/LateralViewJoinOperator.java @@ -78,8 +78,8 @@ // The expected tags from the parent operators. See processOp() before // changing the tags. - public static final int SELECT_TAG = 0; - public static final int UDTF_TAG = 1; + public static final byte SELECT_TAG = 0; + public static final byte UDTF_TAG = 1; @Override protected void initializeOp(Configuration hconf) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java index c378dc7..246d199 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java @@ -55,6 +55,10 @@ public class ReduceSinkOperator extends TerminalOperator implements Serializable, TopNHash.BinaryCollector { + static { + PTFUtils.makeTransient(ReduceSinkOperator.class, "inputAliases"); + } + private static final long serialVersionUID = 1L; protected transient OutputCollector out; @@ -84,14 +88,14 @@ protected transient byte[] tagByte = new byte[1]; transient protected int numDistributionKeys; transient protected int numDistinctExprs; - transient String inputAlias; // input alias of this RS for join (used for PPD) + transient String[] inputAliases; // input aliases of this RS for join (used for PPD) - public void setInputAlias(String inputAlias) { - this.inputAlias = inputAlias; + public void setInputAliases(String[] inputAliases) { + this.inputAliases = inputAliases; } - public String getInputAlias() { - return inputAlias; + public String[] getInputAliases() { + return inputAliases; } public void setOutputCollector(OutputCollector _out) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java index 0798470..0690fb7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java @@ -474,39 +474,38 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, List cols = cppCtx.genColLists(op); Map colExprMap = op.getColumnExprMap(); - // As columns go down the DAG, the LVJ will transform internal column // names from something like 'key' to '_col0'. Because of this, we need // to undo this transformation using the column expression map as the // column names propagate up the DAG. - List colsAfterReplacement = new ArrayList(); - for (String col : cols) { - if (colExprMap.containsKey(col)) { - ExprNodeDesc expr = colExprMap.get(col); - colsAfterReplacement.addAll(expr.getCols()); - } else { - colsAfterReplacement.add(col); - } - } + // this is SEL(*) cols + UDTF cols List outputCols = op.getConf().getOutputInternalColNames(); - if (outputCols.size() != cols.size()) { - // cause we cannot prune columns from UDTF branch currently, extract - // columns from SEL(*) branch only and append all columns from UDTF branch to it - ArrayList newColNames = new ArrayList(); - for (String col : cols) { - int index = outputCols.indexOf(col); - // colExprMap.size() == size of cols from SEL(*) branch - if (index >= 0 && index < colExprMap.size()) { - newColNames.add(col); - } + + // cause we cannot prune columns from UDTF branch currently, extract + // columns from SEL(*) branch only and append all columns from UDTF branch to it + int numSelColumns = op.getConf().getNumSelColumns(); + + List colsAfterReplacement = new ArrayList(); + ArrayList newColNames = new ArrayList(); + for (String col : cols) { + int index = outputCols.indexOf(col); + // colExprMap.size() == size of cols from SEL(*) branch + if (index >= 0 && index < numSelColumns) { + ExprNodeDesc transformed = colExprMap.get(col); + Utilities.mergeUniqElems(colsAfterReplacement, transformed.getCols()); + newColNames.add(col); } - newColNames.addAll(outputCols.subList(colExprMap.size(), outputCols.size())); - op.getConf().setOutputInternalColNames(newColNames); } + // update number of columns from sel(*) + op.getConf().setNumSelColumns(newColNames.size()); - cppCtx.getPrunedColLists().put(op, - colsAfterReplacement); + // add all UDTF columns + // following SEL will do CP for columns from UDTF, not adding SEL in here + newColNames.addAll(outputCols.subList(numSelColumns, outputCols.size())); + op.getConf().setOutputInternalColNames(newColNames); + + cppCtx.getPrunedColLists().put(op, colsAfterReplacement); return null; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d0a0ec7..8b7d987 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -6309,7 +6309,7 @@ private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, @SuppressWarnings("nls") private Operator genJoinReduceSinkChild(QB qb, QBJoinTree joinTree, - Operator child, String srcName, int pos) throws SemanticException { + Operator child, String[] srcs, int pos) throws SemanticException { RowResolver inputRS = opParseCtx.get(child).getRowResolver(); RowResolver outputRS = new RowResolver(); ArrayList outputColumns = new ArrayList(); @@ -6369,7 +6369,7 @@ private Operator genJoinReduceSinkChild(QB qb, QBJoinTree joinTree, reduceKeys.size(), numReds), new RowSchema(outputRS .getColumnInfos()), child), outputRS); rsOp.setColumnExprMap(colExprMap); - rsOp.setInputAlias(srcName); + rsOp.setInputAliases(srcs); return rsOp; } @@ -6388,7 +6388,8 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree, for (ASTNode cond : filter) { joinSrcOp = genFilterPlan(qb, cond, joinSrcOp); } - joinSrcOp = genJoinReduceSinkChild(qb, joinTree, joinSrcOp, null, 0); + + joinSrcOp = genJoinReduceSinkChild(qb, joinTree, joinSrcOp, joinTree.getLeftAliases(), 0); } Operator[] srcOps = new Operator[joinTree.getBaseSrc().length]; @@ -6420,7 +6421,7 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree, } // generate a ReduceSink operator for the join - srcOps[pos] = genJoinReduceSinkChild(qb, joinTree, srcOp, src, pos); + srcOps[pos] = genJoinReduceSinkChild(qb, joinTree, srcOp, new String[]{src}, pos); pos++; } else { assert pos == 0; @@ -8747,6 +8748,8 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) Operator allPath = putOpInsertMap(OperatorFactory.getAndMakeChild( new SelectDesc(true), new RowSchema(allPathRR.getColumnInfos()), lvForward), allPathRR); + int allColumns = allPathRR.getColumnInfos().size(); + // Get the UDTF Path QB blankQb = new QB(null, null, false); Operator udtfPath = genSelectPlan((ASTNode) lateralViewTree @@ -8766,26 +8769,17 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) RowResolver lateralViewRR = new RowResolver(); ArrayList outputInternalColNames = new ArrayList(); - LVmergeRowResolvers(allPathRR, lateralViewRR, outputInternalColNames); - LVmergeRowResolvers(udtfPathRR, lateralViewRR, outputInternalColNames); - // For PPD, we need a column to expression map so that during the walk, // the processor knows how to transform the internal col names. // Following steps are dependant on the fact that we called // LVmerge.. in the above order Map colExprMap = new HashMap(); - int i = 0; - for (ColumnInfo c : allPathRR.getColumnInfos()) { - String internalName = getColumnInternalName(i); - i++; - colExprMap.put(internalName, - new ExprNodeColumnDesc(c.getType(), c.getInternalName(), - c.getTabAlias(), c.getIsVirtualCol())); - } + LVmergeRowResolvers(allPathRR, lateralViewRR, colExprMap, outputInternalColNames); + LVmergeRowResolvers(udtfPathRR, lateralViewRR, colExprMap, outputInternalColNames); Operator lateralViewJoin = putOpInsertMap(OperatorFactory - .getAndMakeChild(new LateralViewJoinDesc(outputInternalColNames), + .getAndMakeChild(new LateralViewJoinDesc(allColumns, outputInternalColNames), new RowSchema(lateralViewRR.getColumnInfos()), allPath, udtfPath), lateralViewRR); lateralViewJoin.setColumnExprMap(colExprMap); @@ -8808,7 +8802,7 @@ private Operator genLateralViewPlan(QB qb, Operator op, ASTNode lateralViewTree) * the same order as in the dest row resolver */ private void LVmergeRowResolvers(RowResolver source, RowResolver dest, - ArrayList outputInternalColNames) { + Map colExprMap, ArrayList outputInternalColNames) { for (ColumnInfo c : source.getColumnInfos()) { String internalName = getColumnInternalName(outputInternalColNames.size()); outputInternalColNames.add(internalName); @@ -8818,6 +8812,8 @@ private void LVmergeRowResolvers(RowResolver source, RowResolver dest, String tableAlias = tableCol[0]; String colAlias = tableCol[1]; dest.put(tableAlias, colAlias, newCol); + colExprMap.put(internalName, new ExprNodeColumnDesc(c.getType(), c.getInternalName(), + c.getTabAlias(), c.getIsVirtualCol())); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java index ebfcfc8..4c0c978 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/LateralViewJoinDesc.java @@ -29,12 +29,14 @@ public class LateralViewJoinDesc extends AbstractOperatorDesc { private static final long serialVersionUID = 1L; + private int numSelColumns; private ArrayList outputInternalColNames; public LateralViewJoinDesc() { } - public LateralViewJoinDesc(ArrayList outputInternalColNames) { + public LateralViewJoinDesc(int numSelColumns, ArrayList outputInternalColNames) { + this.numSelColumns = numSelColumns; this.outputInternalColNames = outputInternalColNames; } @@ -46,4 +48,12 @@ public void setOutputInternalColNames(ArrayList outputInternalColNames) public ArrayList getOutputInternalColNames() { return outputInternalColNames; } + + public int getNumSelColumns() { + return numSelColumns; + } + + public void setNumSelColumns(int numSelColumns) { + this.numSelColumns = numSelColumns; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java index 6a3dd99..f7a3f1c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java @@ -19,9 +19,11 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -289,6 +291,21 @@ public void addNonFinalCandidate(ExprNodeDesc expr) { return nonFinalPreds; } + public Map> getResidualPredicates(boolean clear) { + Map> oldExprs = new HashMap>(); + for (Map.Entry> entry : nonFinalPreds.entrySet()) { + List converted = new ArrayList(); + for (ExprNodeDesc newExpr : entry.getValue()) { + converted.add(newToOldExprMap.get(newExpr)); + } + oldExprs.put(entry.getKey(), converted); + } + if (clear) { + nonFinalPreds.clear(); + } + return oldExprs; + } + /** * Merges the specified pushdown predicates with the current class. * diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java index 40298e1..a6e7088 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; @@ -110,10 +111,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - LOG.info("Processing for " + nd.getName() + "(" - + ((Operator) nd).getIdentifier() + ")"); - //Predicates for UDTF wont be candidates for its children. So, nothing to - //optimize here. See lateral_view_ppd.q for example. + super.process(nd, stack, procCtx, nodeOutputs); + OpWalkerInfo owi = (OpWalkerInfo) procCtx; + ExprWalkerInfo prunedPred = owi.getPrunedPreds((Operator) nd); + if (prunedPred == null) { + return null; + } + Map> candidates = prunedPred.getFinalCandidates(); + if (candidates != null && !candidates.isEmpty()) { + createFilter((Operator)nd, prunedPred, owi); + candidates.clear(); + } return null; } @@ -155,7 +163,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; TableScanOperator tsOp = (TableScanOperator) nd; - mergeWithChildrenPred(tsOp, owi, null, null, false); + mergeWithChildrenPred(tsOp, owi, null, null); ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp); return createFilter(tsOp, pushDownPreds, owi); } @@ -204,7 +212,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, owi.putPrunedPreds((Operator) nd, ewi); } // merge it with children predicates - boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, ewi, null, false); + boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, ewi, null); if (HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { if (hasUnpushedPredicates) { @@ -220,20 +228,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, * Determines predicates for which alias can be pushed to it's parents. See * the comments for getQualifiedAliases function. */ - public static class JoinPPD extends DefaultPPD implements NodeProcessor { + public static class JoinerPPD extends DefaultPPD implements NodeProcessor { @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); OpWalkerInfo owi = (OpWalkerInfo) procCtx; - Set aliases = getQualifiedAliases((JoinOperator) nd, owi - .getRowResolver(nd)); + Set aliases = getAliases(nd, owi); // we pass null for aliases here because mergeWithChildrenPred filters // aliases in the children node context and we need to filter them in // the current JoinOperator's context - boolean hasUnpushedPredicates = - mergeWithChildrenPred(nd, owi, null, null, false); + mergeWithChildrenPred(nd, owi, null, null); ExprWalkerInfo prunePreds = owi.getPrunedPreds((Operator) nd); if (prunePreds != null) { @@ -255,31 +261,42 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } prunePreds.getFinalCandidates().remove(alias); } - if (HiveConf.getBoolVar(owi.getParseContext().getConf(), - HiveConf.ConfVars.HIVEPPDRECOGNIZETRANSITIVITY)) { - applyFilterTransitivity((JoinOperator) nd, owi); - } - if (HiveConf.getBoolVar(owi.getParseContext().getConf(), - HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { - // Here, we add all the "non-final candidiates", ie. the predicates - // rejected from pushdown through this operator to unpushedPreds - // and pass it to createFilter - ExprWalkerInfo unpushedPreds = new ExprWalkerInfo(); - for (Entry> entry : - prunePreds.getNonFinalCandidates().entrySet()) { - for (ExprNodeDesc expr : entry.getValue()) { - assert prunePreds.getNewToOldExprMap().containsKey(expr); - ExprNodeDesc oldExpr = prunePreds.getNewToOldExprMap().get(expr); - unpushedPreds.addAlias(oldExpr, entry.getKey()); - unpushedPreds.addFinalCandidate(oldExpr); - } - } - return createFilter((Operator)nd, unpushedPreds, owi); - } + return handlePredicates(nd, prunePreds, owi); } return null; } + protected Set getAliases(Node nd, OpWalkerInfo owi) throws SemanticException { + return owi.getRowResolver(nd).getTableNames(); + } + + protected Object handlePredicates(Node nd, ExprWalkerInfo prunePreds, OpWalkerInfo owi) + throws SemanticException { + if (HiveConf.getBoolVar(owi.getParseContext().getConf(), + HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { + return createFilter((Operator)nd, prunePreds.getResidualPredicates(true), owi); + } + return null; + } + } + + public static class JoinPPD extends JoinerPPD { + + @Override + protected Set getAliases(Node nd, OpWalkerInfo owi) { + return getQualifiedAliases((JoinOperator) nd, owi.getRowResolver(nd)); + } + + @Override + protected Object handlePredicates(Node nd, ExprWalkerInfo prunePreds, OpWalkerInfo owi) + throws SemanticException { + if (HiveConf.getBoolVar(owi.getParseContext().getConf(), + HiveConf.ConfVars.HIVEPPDRECOGNIZETRANSITIVITY)) { + applyFilterTransitivity((JoinOperator) nd, owi); + } + return super.handlePredicates(nd, prunePreds, owi); + } + /** * Adds additional pushdown predicates for a join operator by replicating * filters transitively over all the equijoin conditions. @@ -495,66 +512,56 @@ private void replaceColumnReference(ExprNodeDesc expr, } /** - * Processor for ReduceSink operator. - * + * Default processor which just merges its children. */ - public static class ReduceSinkPPD extends DefaultPPD implements NodeProcessor { + public static class DefaultPPD implements NodeProcessor { + @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LOG.info("Processing for " + nd.getName() + "(" + ((Operator) nd).getIdentifier() + ")"); - ReduceSinkOperator rs = (ReduceSinkOperator) nd; OpWalkerInfo owi = (OpWalkerInfo) procCtx; - Set aliases; - boolean ignoreAliases = false; - if (rs.getInputAlias() != null) { - aliases = new HashSet(Arrays.asList(rs.getInputAlias())); - } else { - aliases = owi.getRowResolver(nd).getTableNames(); - if (aliases.size() == 1 && aliases.contains("")) { - // Reduce sink of group by operator - ignoreAliases = true; - } - } - boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, null, aliases, ignoreAliases); - if (HiveConf.getBoolVar(owi.getParseContext().getConf(), + Set includes = getQualifiedAliases((Operator) nd, owi); + boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, null, includes); + if (hasUnpushedPredicates && HiveConf.getBoolVar(owi.getParseContext().getConf(), HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { - if (hasUnpushedPredicates) { - Operator op = - (Operator) nd; - Operator childOperator = op.getChildOperators().get(0); - if(childOperator.getParentOperators().size()==1) { - owi.getCandidateFilterOps().clear(); + if (includes != null || nd instanceof ReduceSinkOperator) { + owi.getCandidateFilterOps().clear(); + } else { + ExprWalkerInfo pruned = owi.getPrunedPreds((Operator) nd); + Map> residual = pruned.getResidualPredicates(true); + if (residual != null && !residual.isEmpty()) { + createFilter((Operator) nd, residual, owi); + pruned.getNonFinalCandidates().clear(); } } } return null; } - } - - /** - * Default processor which just merges its children. - */ - public static class DefaultPPD implements NodeProcessor { - - @Override - public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, - Object... nodeOutputs) throws SemanticException { - LOG.info("Processing for " + nd.getName() + "(" - + ((Operator) nd).getIdentifier() + ")"); - OpWalkerInfo owi = (OpWalkerInfo) procCtx; - boolean hasUnpushedPredicates = mergeWithChildrenPred(nd, owi, null, null, false); - if (HiveConf.getBoolVar(owi.getParseContext().getConf(), - HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { - if (hasUnpushedPredicates) { - ExprWalkerInfo unpushedPreds = mergeChildrenPred(nd, owi, null, false); - return createFilter((Operator)nd, unpushedPreds, owi); - } + // RS for join, SEL(*) for lateral view + // SEL for union does not count (should be copied to both sides) + private Set getQualifiedAliases(Operator operator, OpWalkerInfo owi) { + if (operator.getNumChild() != 1) { + return null; } - return null; + Operator child = operator.getChildOperators().get(0); + if (!(child instanceof JoinOperator || child instanceof LateralViewJoinOperator)) { + return null; + } + if (operator instanceof ReduceSinkOperator && + ((ReduceSinkOperator)operator).getInputAliases() != null) { + String[] aliases = ((ReduceSinkOperator)operator).getInputAliases(); + return new HashSet(Arrays.asList(aliases)); + } + Set includes = owi.getRowResolver(operator).getTableNames(); + if (includes.size() == 1 && includes.contains("")) { + // Reduce sink of group by operator + return null; + } + return includes; } /** @@ -585,12 +592,10 @@ protected void logExpr(Node nd, ExprWalkerInfo ewi) { * @param aliases * aliases that this operator can pushdown. null means that all * aliases can be pushed down - * @param ignoreAliases * @throws SemanticException */ protected boolean mergeWithChildrenPred(Node nd, OpWalkerInfo owi, - ExprWalkerInfo ewi, Set aliases, boolean ignoreAliases) - throws SemanticException { + ExprWalkerInfo ewi, Set aliases) throws SemanticException { boolean hasUnpushedPredicates = false; Operator current = (Operator) nd; List> children = current.getChildOperators(); @@ -616,8 +621,7 @@ protected boolean mergeWithChildrenPred(Node nd, OpWalkerInfo owi, } for (Entry> e : childPreds .getFinalCandidates().entrySet()) { - if (ignoreAliases || aliases == null || aliases.contains(e.getKey()) - || e.getKey() == null) { + if (aliases == null || e.getKey() == null || aliases.contains(e.getKey())) { // e.getKey() (alias) can be null in case of constant expressions. see // input8.q ExprWalkerInfo extractPushdownPreds = ExprWalkerProcFactory @@ -627,8 +631,6 @@ protected boolean mergeWithChildrenPred(Node nd, OpWalkerInfo owi, } ewi.merge(extractPushdownPreds); logExpr(nd, extractPushdownPreds); - } else { - hasUnpushedPredicates = true; } } owi.putPrunedPreds((Operator) nd, ewi); @@ -678,13 +680,16 @@ protected static Object createFilter(Operator op, || pushDownPreds.getFinalCandidates().size() == 0) { return null; } + return createFilter(op, pushDownPreds.getFinalCandidates(), owi); + } + protected static Object createFilter(Operator op, + Map> predicates, OpWalkerInfo owi) { RowResolver inputRR = owi.getRowResolver(op); // combine all predicates into a single expression List preds = new ArrayList(); - Iterator> iterator = pushDownPreds.getFinalCandidates() - .values().iterator(); + Iterator> iterator = predicates.values().iterator(); while (iterator.hasNext()) { for (ExprNodeDesc pred : iterator.next()) { preds = ExprNodeDescUtils.split(pred, preds); @@ -847,10 +852,6 @@ public static NodeProcessor getJoinProc() { return new JoinPPD(); } - public static NodeProcessor getRSProc() { - return new ReduceSinkPPD(); - } - public static NodeProcessor getTSProc() { return new TableScanPPD(); } @@ -871,12 +872,16 @@ public static NodeProcessor getLIMProc() { return new ScriptPPD(); } + public static NodeProcessor getLVFProc() { + return new LateralViewForwardPPD(); + } + public static NodeProcessor getUDTFProc() { return new UDTFPPD(); } - public static NodeProcessor getLVFProc() { - return new LateralViewForwardPPD(); + public static NodeProcessor getLVJProc() { + return new JoinerPPD(); } private OpProcFactory() { diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java index cd5ae51..b99f679 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/PredicatePushDown.java @@ -24,9 +24,9 @@ import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.PTFOperator; -import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.ScriptOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UDTFOperator; @@ -97,12 +97,9 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { CommonJoinOperator.getOperatorName() + "%"), OpProcFactory.getJoinProc()); opRules.put(new RuleRegExp("R4", - ReduceSinkOperator.getOperatorName() + "%"), - OpProcFactory.getRSProc()); - opRules.put(new RuleRegExp("R5", TableScanOperator.getOperatorName() + "%"), OpProcFactory.getTSProc()); - opRules.put(new RuleRegExp("R6", + opRules.put(new RuleRegExp("R5", ScriptOperator.getOperatorName() + "%"), OpProcFactory.getSCRProc()); opRules.put(new RuleRegExp("R6", @@ -114,6 +111,9 @@ public ParseContext transform(ParseContext pctx) throws SemanticException { opRules.put(new RuleRegExp("R8", LateralViewForwardOperator.getOperatorName() + "%"), OpProcFactory.getLVFProc()); + opRules.put(new RuleRegExp("R9", + LateralViewJoinOperator.getOperatorName() + "%"), + OpProcFactory.getLVJProc()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along diff --git ql/src/test/queries/clientpositive/lateral_view_ppd.q ql/src/test/queries/clientpositive/lateral_view_ppd.q index 7be86a6..65ae518 100644 --- ql/src/test/queries/clientpositive/lateral_view_ppd.q +++ ql/src/test/queries/clientpositive/lateral_view_ppd.q @@ -11,3 +11,7 @@ SELECT value, myCol FROM (SELECT * FROM srcpart LATERAL VIEW explode(array(1,2,3 EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0'; SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array(1,2,3)) myTable2 AS myCol2) a WHERE key='0'; + +-- HIVE-4293 Predicates following UDTF operator are removed by PPD +EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0'; +SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0'; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/ppd_udtf.q ql/src/test/queries/clientpositive/ppd_udtf.q new file mode 100644 index 0000000..d90532c --- /dev/null +++ ql/src/test/queries/clientpositive/ppd_udtf.q @@ -0,0 +1,12 @@ +explain +SELECT value from ( + select explode(array(key, value)) as (value) from ( + select * FROM src WHERE key > 400 + ) A +) B WHERE value < 450; + +SELECT value from ( + select explode(array(key, value)) as (value) from ( + select * FROM src WHERE key > 400 + ) A +) B WHERE value < 450; diff --git ql/src/test/results/clientpositive/cluster.q.out ql/src/test/results/clientpositive/cluster.q.out index 8d14a1d..e367805 100644 --- ql/src/test/results/clientpositive/cluster.q.out +++ ql/src/test/results/clientpositive/cluster.q.out @@ -494,47 +494,47 @@ STAGE PLANS: y:x TableScan alias: x - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col0 + Filter Operator + predicate: + expr: (key = 20) + type: boolean + Select Operator + expressions: + expr: key type: string - expr: _col1 + expr: value type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string Reduce Operator Tree: Extract - Filter Operator - predicate: - expr: (_col0 = 20) - type: boolean - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/ctas_colname.q.out ql/src/test/results/clientpositive/ctas_colname.q.out index a15b698..155b026 100644 --- ql/src/test/results/clientpositive/ctas_colname.q.out +++ ql/src/test/results/clientpositive/ctas_colname.q.out @@ -1267,7 +1267,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: - expr: ((_col0 < 9) and _col1 is not null) + expr: _col1 is not null type: boolean Select Operator expressions: diff --git ql/src/test/results/clientpositive/lateral_view_ppd.q.out ql/src/test/results/clientpositive/lateral_view_ppd.q.out index f54c809..c252d0b 100644 --- ql/src/test/results/clientpositive/lateral_view_ppd.q.out +++ ql/src/test/results/clientpositive/lateral_view_ppd.q.out @@ -112,31 +112,25 @@ STAGE PLANS: Lateral View Forward Select Operator expressions: - expr: key - type: string expr: value type: string - outputColumnNames: key, value + outputColumnNames: value Lateral View Join Operator - outputColumnNames: _col0, _col1, _col4 - Filter Operator - predicate: - expr: ((_col0 = '0') and (_col4 = 1)) - type: boolean - Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: int - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col1, _col4 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col4 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Select Operator expressions: expr: array(1,2,3) @@ -144,12 +138,12 @@ STAGE PLANS: outputColumnNames: _col0 UDTF Operator function name: explode - Lateral View Join Operator - outputColumnNames: _col0, _col1, _col4 - Filter Operator - predicate: - expr: ((_col0 = '0') and (_col4 = 1)) - type: boolean + Filter Operator + predicate: + expr: (col = 1) + type: boolean + Lateral View Join Operator + outputColumnNames: _col1, _col4 Select Operator expressions: expr: _col1 @@ -445,3 +439,96 @@ val_0 2 val_0 3 val_0 3 val_0 3 +PREHOOK: query: -- HIVE-4293 Predicates following UDTF operator are removed by PPD +EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0' +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-4293 Predicates following UDTF operator are removed by PPD +EXPLAIN SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol (TOK_TABALIAS myTable))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL myCol) 1)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_TABLE_OR_COL myCol))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) '0')))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key = '0') + type: boolean + Lateral View Forward + Select Operator + expressions: + expr: value + type: string + outputColumnNames: value + Lateral View Join Operator + outputColumnNames: _col1, _col4 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col4 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Select Operator + expressions: + expr: array(1,2,3) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Filter Operator + predicate: + expr: (col > 1) + type: boolean + Lateral View Join Operator + outputColumnNames: _col1, _col4 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col4 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT value, myCol FROM (SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol WHERE myCol > 1) a WHERE key='0' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 2 +val_0 3 +val_0 2 +val_0 3 +val_0 2 +val_0 3 diff --git ql/src/test/results/clientpositive/ppd2.q.out ql/src/test/results/clientpositive/ppd2.q.out index f6af8f8..c8b7bdb 100644 --- ql/src/test/results/clientpositive/ppd2.q.out +++ ql/src/test/results/clientpositive/ppd2.q.out @@ -80,31 +80,24 @@ STAGE PLANS: type: string mode: mergepartial outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - Filter Operator - predicate: - expr: (_col1 > 1) - type: boolean - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: + expr: (_col1 > 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/ppd_gby.q.out ql/src/test/results/clientpositive/ppd_gby.q.out index 5908450..444e8cf 100644 --- ql/src/test/results/clientpositive/ppd_gby.q.out +++ ql/src/test/results/clientpositive/ppd_gby.q.out @@ -307,7 +307,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: - expr: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) + expr: ((_col1 > 30) or (_col0 < 'val_400')) type: boolean Select Operator expressions: diff --git ql/src/test/results/clientpositive/ppd_gby2.q.out ql/src/test/results/clientpositive/ppd_gby2.q.out index bdd7e89..98c027c 100644 --- ql/src/test/results/clientpositive/ppd_gby2.q.out +++ ql/src/test/results/clientpositive/ppd_gby2.q.out @@ -244,7 +244,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Filter Operator predicate: - expr: ((_col0 > 'val_200') and ((_col1 > 30) or (_col0 < 'val_400'))) + expr: ((_col1 > 30) or (_col0 < 'val_400')) type: boolean Select Operator expressions: diff --git ql/src/test/results/clientpositive/ppd_udtf.q.out ql/src/test/results/clientpositive/ppd_udtf.q.out new file mode 100644 index 0000000..67a0564 --- /dev/null +++ ql/src/test/results/clientpositive/ppd_udtf.q.out @@ -0,0 +1,132 @@ +PREHOOK: query: explain +SELECT value from ( + select explode(array(key, value)) as (value) from ( + select * FROM src WHERE key > 400 + ) A +) B WHERE value < 450 +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT value from ( + select explode(array(key, value)) as (value) from ( + select * FROM src WHERE key > 400 + ) A +) B WHERE value < 450 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (> (TOK_TABLE_OR_COL key) 400)))) A)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value))) value)))) B)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value))) (TOK_WHERE (< (TOK_TABLE_OR_COL value) 450)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b:a:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key > 400) + type: boolean + Select Operator + expressions: + expr: array(key,value) + type: array + outputColumnNames: _col0 + UDTF Operator + function name: explode + Filter Operator + predicate: + expr: (col < 450) + type: boolean + Select Operator + expressions: + expr: col + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT value from ( + select explode(array(key, value)) as (value) from ( + select * FROM src WHERE key > 400 + ) A +) B WHERE value < 450 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT value from ( + select explode(array(key, value)) as (value) from ( + select * FROM src WHERE key > 400 + ) A +) B WHERE value < 450 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +409 +401 +406 +429 +403 +417 +430 +446 +413 +417 +439 +438 +427 +437 +430 +404 +411 +431 +418 +404 +436 +409 +413 +419 +435 +401 +402 +430 +449 +401 +438 +432 +403 +406 +409 +406 +401 +424 +431 +431 +424 +438 +414 +439 +417 +444 +429 +443 +406 +401 +421 +407 +448 +414 +403 diff --git ql/src/test/results/clientpositive/udtf_json_tuple.q.out ql/src/test/results/clientpositive/udtf_json_tuple.q.out index 1a480b6..21d273a 100644 --- ql/src/test/results/clientpositive/udtf_json_tuple.q.out +++ ql/src/test/results/clientpositive/udtf_json_tuple.q.out @@ -426,36 +426,32 @@ STAGE PLANS: Select Operator Lateral View Join Operator outputColumnNames: _col4, _col5, _col6, _col7, _col8 - Filter Operator - predicate: - expr: _col4 is not null - type: boolean - Select Operator - expressions: + Select Operator + expressions: + expr: _col5 + type: string + outputColumnNames: _col5 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: expr: _col5 type: string - outputColumnNames: _col5 - Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col5 + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint Select Operator expressions: expr: jstring @@ -473,12 +469,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 UDTF Operator function name: json_tuple - Lateral View Join Operator - outputColumnNames: _col4, _col5, _col6, _col7, _col8 - Filter Operator - predicate: - expr: _col4 is not null - type: boolean + Filter Operator + predicate: + expr: c0 is not null + type: boolean + Lateral View Join Operator + outputColumnNames: _col4, _col5, _col6, _col7, _col8 Select Operator expressions: expr: _col5 diff --git ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out index a38b31b..7d5961a 100644 --- ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out +++ ql/src/test/results/clientpositive/udtf_parse_url_tuple.q.out @@ -550,36 +550,32 @@ STAGE PLANS: Select Operator Lateral View Join Operator outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Filter Operator - predicate: - expr: _col12 is not null - type: boolean - Select Operator - expressions: + Select Operator + expressions: + expr: _col4 + type: string + outputColumnNames: _col4 + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: expr: _col4 type: string - outputColumnNames: _col4 - Group By Operator - aggregations: - expr: count() - bucketGroup: false - keys: - expr: _col4 + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 type: string - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: - expr: _col1 - type: bigint + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint Select Operator expressions: expr: fullurl @@ -605,12 +601,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 UDTF Operator function name: parse_url_tuple - Lateral View Join Operator - outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Filter Operator - predicate: - expr: _col12 is not null - type: boolean + Filter Operator + predicate: + expr: c8 is not null + type: boolean + Lateral View Join Operator + outputColumnNames: _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Select Operator expressions: expr: _col4 diff --git ql/src/test/results/compiler/plan/join1.q.xml ql/src/test/results/compiler/plan/join1.q.xml index 9e4b609..91d2061 100644 --- ql/src/test/results/compiler/plan/join1.q.xml +++ ql/src/test/results/compiler/plan/join1.q.xml @@ -595,9 +595,6 @@ - - src2 - RS_3 @@ -908,9 +905,6 @@ - - src1 - RS_2 diff --git ql/src/test/results/compiler/plan/join2.q.xml ql/src/test/results/compiler/plan/join2.q.xml index efcb865..174e719 100644 --- ql/src/test/results/compiler/plan/join2.q.xml +++ ql/src/test/results/compiler/plan/join2.q.xml @@ -753,9 +753,6 @@ - - src3 - RS_7 @@ -2045,9 +2042,6 @@ - - src2 - RS_4 @@ -2332,9 +2326,6 @@ - - src1 - RS_3 diff --git ql/src/test/results/compiler/plan/join3.q.xml ql/src/test/results/compiler/plan/join3.q.xml index 9bbe64f..2b2d3fb 100644 --- ql/src/test/results/compiler/plan/join3.q.xml +++ ql/src/test/results/compiler/plan/join3.q.xml @@ -709,9 +709,6 @@ - - src2 - RS_4 @@ -1017,9 +1014,6 @@ - - src3 - RS_5 @@ -1326,9 +1320,6 @@ - - src1 - RS_3 diff --git ql/src/test/results/compiler/plan/join4.q.xml ql/src/test/results/compiler/plan/join4.q.xml index f8a8f10..c019ef2 100644 --- ql/src/test/results/compiler/plan/join4.q.xml +++ ql/src/test/results/compiler/plan/join4.q.xml @@ -447,9 +447,6 @@ - - a - RS_6 @@ -1014,9 +1011,6 @@ - - b - RS_7 diff --git ql/src/test/results/compiler/plan/join5.q.xml ql/src/test/results/compiler/plan/join5.q.xml index d43ce00..cece20e 100644 --- ql/src/test/results/compiler/plan/join5.q.xml +++ ql/src/test/results/compiler/plan/join5.q.xml @@ -447,9 +447,6 @@ - - a - RS_6 @@ -1014,9 +1011,6 @@ - - b - RS_7 diff --git ql/src/test/results/compiler/plan/join6.q.xml ql/src/test/results/compiler/plan/join6.q.xml index a354e4b..60a7c02 100644 --- ql/src/test/results/compiler/plan/join6.q.xml +++ ql/src/test/results/compiler/plan/join6.q.xml @@ -447,9 +447,6 @@ - - a - RS_6 @@ -1014,9 +1011,6 @@ - - b - RS_7 diff --git ql/src/test/results/compiler/plan/join7.q.xml ql/src/test/results/compiler/plan/join7.q.xml index ac41995..16a008e 100644 --- ql/src/test/results/compiler/plan/join7.q.xml +++ ql/src/test/results/compiler/plan/join7.q.xml @@ -583,9 +583,6 @@ - - a - RS_9 @@ -1150,9 +1147,6 @@ - - b - RS_10 @@ -1705,9 +1699,6 @@ - - c - RS_11 diff --git ql/src/test/results/compiler/plan/join8.q.xml ql/src/test/results/compiler/plan/join8.q.xml index 569cf6e..c71564a 100644 --- ql/src/test/results/compiler/plan/join8.q.xml +++ ql/src/test/results/compiler/plan/join8.q.xml @@ -447,9 +447,6 @@ - - a - RS_6 @@ -1055,9 +1052,6 @@ - - b - RS_7