diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index 6651900..4af09f0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -468,7 +468,7 @@ "requires \"AND \" on the 1st WHEN MATCHED clause of <{0}>", true), MERGE_TOO_MANY_DELETE(10405, "MERGE statment can have at most 1 WHEN MATCHED ... DELETE clause: <{0}>", true), MERGE_TOO_MANY_UPDATE(10406, "MERGE statment can have at most 1 WHEN MATCHED ... UPDATE clause: <{0}>", true), - INVALID_JOIN_CONDITION(10407, "Error parsing condition in outer join"), + INVALID_JOIN_CONDITION(10407, "Error parsing condition in join"), INVALID_TARGET_COLUMN_IN_SET_CLAUSE(10408, "Target column \"{0}\" of set clause is not found in table \"{1}\".", true), HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY(10409, "Expression in GROUPING function not present in GROUP BY"), //========================== 20000 range starts here ========================// diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 07fd653..3573d07 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -361,13 +361,6 @@ protected void initializeOp(Configuration hconf) throws HiveException { // Create post-filtering evaluators if needed if (conf.getResidualFilterExprs() != null) { - // Currently residual filter expressions are only used with outer joins, thus - // we add this safeguard. - // TODO: Remove this guard when support for residual expressions can be present - // for inner joins too. This would be added to improve efficiency in the evaluation - // of certain joins, since we will not be emitting rows which are thrown away by - // filter straight away. - assert !noOuterJoin; residualJoinFilters = new ArrayList<>(conf.getResidualFilterExprs().size()); residualJoinFiltersOIs = new ArrayList<>(conf.getResidualFilterExprs().size()); for (int i = 0; i < conf.getResidualFilterExprs().size(); i++) { @@ -377,10 +370,12 @@ protected void initializeOp(Configuration hconf) throws HiveException { residualJoinFilters.get(i).initialize(outputObjInspector)); } needsPostEvaluation = true; - // We need to disable join emit interval, since for outer joins with post conditions - // we need to have the full view on the right matching rows to know whether we need - // to produce a row with NULL values or not - joinEmitInterval = -1; + if (!noOuterJoin) { + // We need to disable join emit interval, since for outer joins with post conditions + // we need to have the full view on the right matching rows to know whether we need + // to produce a row with NULL values or not + joinEmitInterval = -1; + } } if (LOG.isInfoEnabled()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 737aad1..72bdc71 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -1890,7 +1890,7 @@ private boolean validateMapJoinDesc(MapJoinDesc desc) { return false; } if (desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) { - LOG.info("Cannot vectorize outer join with complex ON clause"); + LOG.info("Cannot vectorize join with complex ON clause"); return false; } return true; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index fc6adaf..ae32a28 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; import org.apache.hadoop.hive.ql.exec.Operator; @@ -312,9 +313,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, return null; } - private long evaluateExpression(Statistics stats, ExprNodeDesc pred, + protected long evaluateExpression(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, List neededCols, - FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { + Operator op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { long newNumRows = 0; Statistics andStats = null; @@ -338,11 +339,11 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred, // evaluate children for (ExprNodeDesc child : genFunc.getChildren()) { newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, - aspCtx, neededCols, fop, evaluatedRowCount); + aspCtx, neededCols, op, evaluatedRowCount); if (satisfyPrecondition(aspCtx.getAndExprStats())) { - updateStats(aspCtx.getAndExprStats(), newNumRows, true, fop); + updateStats(aspCtx.getAndExprStats(), newNumRows, true, op); } else { - updateStats(aspCtx.getAndExprStats(), newNumRows, false, fop); + updateStats(aspCtx.getAndExprStats(), newNumRows, false, op); } } } else if (udf instanceof GenericUDFOPOr) { @@ -353,24 +354,24 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred, evaluatedRowCount = stats.getNumRows(); } else { newNumRows = StatsUtils.safeAdd( - evaluateChildExpr(stats, child, aspCtx, neededCols, fop, evaluatedRowCount), + evaluateChildExpr(stats, child, aspCtx, neededCols, op, evaluatedRowCount), newNumRows); evaluatedRowCount = newNumRows; } } } else if (udf instanceof GenericUDFIn) { // for IN clause - newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop); + newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFBetween) { // for BETWEEN clause - newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, fop); + newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFOPNot) { - newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop); + newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, op); } else if (udf instanceof GenericUDFOPNotNull) { return evaluateNotNullExpr(stats, genFunc); } else { // single predicate condition - newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop, evaluatedRowCount); + newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op, evaluatedRowCount); } } else if (pred instanceof ExprNodeColumnDesc) { @@ -410,7 +411,7 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred, } private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, - List neededCols, FilterOperator fop) throws SemanticException { + List neededCols, Operator op) throws SemanticException { long numRows = stats.getNumRows(); @@ -500,7 +501,7 @@ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsPr } private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx, - List neededCols, FilterOperator fop) throws SemanticException, CloneNotSupportedException { + List neededCols, Operator op) throws SemanticException, CloneNotSupportedException { final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred; final boolean invert = Boolean.TRUE.equals( ((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // boolean invert (not) @@ -528,11 +529,11 @@ private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateSt new GenericUDFOPNot(), Lists.newArrayList(newExpression)); } - return evaluateExpression(stats, newExpression, aspCtx, neededCols, fop, 0); + return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, 0); } private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, - AnnotateStatsProcCtx aspCtx, List neededCols, FilterOperator fop) + AnnotateStatsProcCtx aspCtx, List neededCols, Operator op) throws CloneNotSupportedException, SemanticException { long numRows = stats.getNumRows(); @@ -547,7 +548,7 @@ private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, long newNumRows = 0; for (ExprNodeDesc child : genFunc.getChildren()) { newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols, - fop, 0); + op, 0); } return numRows - newNumRows; } else if (leaf instanceof ExprNodeConstantDesc) { @@ -832,7 +833,7 @@ private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFun private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx, List neededCols, - FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { + Operator op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException { long numRows = stats.getNumRows(); @@ -919,7 +920,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr || udf instanceof GenericUDFIn || udf instanceof GenericUDFBetween || udf instanceof GenericUDFOPNot) { - return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount); + return evaluateExpression(stats, genFunc, aspCtx, neededCols, op, evaluatedRowCount); } else if (udf instanceof GenericUDFInBloomFilter) { if (genFunc.getChildren().get(1) instanceof ExprNodeDynamicValueDesc) { // Synthetic predicates from semijoin opt should not affect stats. @@ -1405,7 +1406,7 @@ private boolean checkMapSideAggregation(GroupByOperator gop, * "Database Systems: The Complete Book" by Garcia-Molina et. al. *

*/ - public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor { + public static class JoinStatsRule extends FilterStatsRule implements NodeProcessor { @Override @@ -1542,8 +1543,37 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // update join statistics stats.setColumnStats(outColStats); - long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom, jop); - updateColStats(conf, stats, newRowCount, jop, rowCountParents); + long joinRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom, jop); + updateColStats(conf, stats, joinRowCount, jop, rowCountParents); + + // evaluate filter expression and update statistics + if (joinRowCount != -1 && jop.getConf().getNoOuterJoin() && + jop.getConf().getResidualFilterExprs() != null && + !jop.getConf().getResidualFilterExprs().isEmpty()) { + ExprNodeDesc pred; + if (jop.getConf().getResidualFilterExprs().size() > 1) { + pred = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getGenericUDFForAnd(), + jop.getConf().getResidualFilterExprs()); + } else { + pred = jop.getConf().getResidualFilterExprs().get(0); + } + // evaluate filter expression and update statistics + try { + newNumRows = evaluateExpression(stats, pred, + aspCtx, jop.getSchema().getColumnNames(), jop, 0); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + // update statistics based on column statistics. + // OR conditions keeps adding the stats independently, this may + // result in number of rows getting more than the input rows in + // which case stats need not be updated + if (newNumRows <= joinRowCount) { + updateStats(stats, newNumRows, true, jop); + } + } + jop.setStatistics(stats); if (LOG.isDebugEnabled()) { @@ -1599,9 +1629,37 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor); } + Statistics wcStats = new Statistics(); wcStats.setNumRows(newNumRows); wcStats.setDataSize(newDataSize); + + // evaluate filter expression and update statistics + if (jop.getConf().getNoOuterJoin() && + jop.getConf().getResidualFilterExprs() != null && + !jop.getConf().getResidualFilterExprs().isEmpty()) { + long joinRowCount = newNumRows; + ExprNodeDesc pred; + if (jop.getConf().getResidualFilterExprs().size() > 1) { + pred = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + FunctionRegistry.getGenericUDFForAnd(), + jop.getConf().getResidualFilterExprs()); + } else { + pred = jop.getConf().getResidualFilterExprs().get(0); + } + // evaluate filter expression and update statistics + try { + newNumRows = evaluateExpression(wcStats, pred, + aspCtx, jop.getSchema().getColumnNames(), jop, 0); + } catch (CloneNotSupportedException e) { + throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg()); + } + // update only the basic statistics in the absence of column statistics + if (newNumRows <= joinRowCount) { + updateStats(wcStats, newNumRows, false, jop); + } + } + jop.setStatistics(wcStats); if (LOG.isDebugEnabled()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 9e84a29..e36c616 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8143,11 +8143,12 @@ private Operator genJoinOperatorChildren(QBJoinTree join, Operator left, join.getNoOuterJoin(), joinCondns, filterMap, joinKeys, null); desc.setReversedExprs(reversedExprs); desc.setFilterMap(join.getFilterMap()); - // For outer joins, add filters that apply to more than one input - if (!join.getNoOuterJoin() && join.getPostJoinFilters().size() != 0) { + // Add filters that apply to more than one input + if (join.getPostJoinFilters().size() != 0) { + LOG.debug("Generate JOIN with post-filtering conditions"); List residualFilterExprs = new ArrayList(); for (ASTNode cond : join.getPostJoinFilters()) { - residualFilterExprs.add(genExprNodeDesc(cond, outputRR)); + residualFilterExprs.add(genExprNodeDesc(cond, outputRR, false, isCBOExecuted())); } desc.setResidualFilterExprs(residualFilterExprs); // Clean post-conditions @@ -8357,15 +8358,7 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree, if (joinTree.getPostJoinFilters().size() != 0) { // Safety check for postconditions - assert joinTree.getNoOuterJoin(); - Operator op = joinOp; - for(ASTNode condn : joinTree.getPostJoinFilters()) { - op = genFilterPlan(qb, condn, op, false); - if (LOG.isDebugEnabled()) { - LOG.debug("Generated " + op + " with post-filtering conditions after JOIN operator"); - } - } - return op; + throw new SemanticException("Post-filtering conditions should have been added to the JOIN operator"); } return joinOp; diff --git ql/src/test/results/clientpositive/join43.q.out ql/src/test/results/clientpositive/join43.q.out index 24168ca..06f59cc 100644 --- ql/src/test/results/clientpositive/join43.q.out +++ ql/src/test/results/clientpositive/join43.q.out @@ -258,22 +258,20 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > _col3) (type: boolean) + residual filter predicates: {(_col1 > _col3)} + Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col3) + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col3) - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -476,22 +474,20 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > _col3) (type: boolean) + residual filter predicates: {(_col1 > _col3)} + Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col3) + keys: _col0 (type: string), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(_col3) - keys: _col0 (type: string), _col1 (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git ql/src/test/results/clientpositive/join45.q.out ql/src/test/results/clientpositive/join45.q.out index 3c373e6..20dce5d 100644 --- ql/src/test/results/clientpositive/join45.q.out +++ ql/src/test/results/clientpositive/join45.q.out @@ -359,20 +359,18 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 or UDFToDouble(_col3) BETWEEN 100.0 AND 102.0) (type: boolean) - Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 or UDFToDouble(_col3) BETWEEN 100.0 AND 102.0)} + Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -464,20 +462,18 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0) and ((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0)) (type: boolean) - Statistics: Num rows: 1388 Data size: 26738 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + residual filter predicates: {((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0)} {((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0)} + Statistics: Num rows: 1388 Data size: 26738 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -563,20 +559,18 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0) or ((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0)) (type: boolean) - Statistics: Num rows: 8332 Data size: 160507 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + residual filter predicates: {(((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0) or ((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0))} + Statistics: Num rows: 8332 Data size: 160507 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -664,20 +658,18 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102)) (type: boolean) - Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + residual filter predicates: {(struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102))} + Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -795,24 +787,22 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) >= 100.0) (type: boolean) + residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) >= 100.0)} + Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -942,20 +932,18 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean) - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)} + Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1046,16 +1034,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0) (type: boolean) - Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + residual filter predicates: {((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0)} + Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1228,20 +1214,18 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean) - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)} + Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1322,16 +1306,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0) (type: boolean) - Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + residual filter predicates: {((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0)} + Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1504,20 +1486,18 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean) - Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)} + Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1677,24 +1657,22 @@ STAGE PLANS: 2 3 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 - Statistics: Num rows: 176000 Data size: 9396800 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((_col0 = _col24) or ((_col1 = _col25) and (_col26 = _col20))) and ((_col12 = _col24) or ((_col27 = _col15) and (_col13 = _col25))) and ((_col6 = _col24) or ((_col28 = _col16) and (_col8 = 42)))) (type: boolean) + residual filter predicates: {((_col6 = _col24) or ((_col28 = _col16) and (_col8 = 42)))} {((_col12 = _col24) or ((_col27 = _col15) and (_col13 = _col25)))} {((_col0 = _col24) or ((_col1 = _col25) and (_col26 = _col20)))} + Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: string), _col19 (type: string), _col20 (type: int), _col21 (type: float), _col22 (type: boolean), _col23 (type: string), _col24 (type: string), _col25 (type: string), _col26 (type: int), _col27 (type: float), _col28 (type: boolean), _col29 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: float), _col16 (type: boolean), _col17 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: float), _col10 (type: boolean), _col11 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: string), _col19 (type: string), _col20 (type: int), _col21 (type: float), _col22 (type: boolean), _col23 (type: string), _col24 (type: string), _col25 (type: string), _col26 (type: int), _col27 (type: float), _col28 (type: boolean), _col29 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: float), _col16 (type: boolean), _col17 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: float), _col10 (type: boolean), _col11 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29 - Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/join_alt_syntax.q.out ql/src/test/results/clientpositive/join_alt_syntax.q.out index 1c08e6a..8083b65 100644 --- ql/src/test/results/clientpositive/join_alt_syntax.q.out +++ ql/src/test/results/clientpositive/join_alt_syntax.q.out @@ -322,21 +322,19 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 + _col3) = _col3) (type: boolean) + residual filter predicates: {((_col0 + _col3) = _col3)} + Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col4 (type: string), _col1 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out index 831d84f..6ccc536 100644 --- ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out +++ ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out @@ -263,21 +263,19 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) + residual filter predicates: {((_col0 + _col18) = _col18)} + Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out index 560b8e0..db254b1 100644 --- ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out +++ ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out @@ -269,21 +269,19 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 - Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 + _col18) = _col18) (type: boolean) + residual filter predicates: {((_col0 + _col18) = _col18)} + Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26 + File Output Operator + compressed: false Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out index f77c75c..b60e782 100644 --- ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out +++ ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out @@ -277,16 +277,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 26 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col9 + _col0) = _col0) (type: boolean) - Statistics: Num rows: 13 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + residual filter predicates: {((_col9 + _col0) = _col0)} + Statistics: Num rows: 13 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out index f15d536..c24cea9 100644 --- ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out +++ ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out @@ -283,16 +283,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 26 Data size: 3173 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col9 + _col0) = _col0) (type: boolean) - Statistics: Num rows: 13 Data size: 1586 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + residual filter predicates: {((_col9 + _col0) = _col0)} + Statistics: Num rows: 13 Data size: 1586 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git ql/src/test/results/clientpositive/ppd_gby_join.q.out ql/src/test/results/clientpositive/ppd_gby_join.q.out index a160410..03767d5 100644 --- ql/src/test/results/clientpositive/ppd_gby_join.q.out +++ ql/src/test/results/clientpositive/ppd_gby_join.q.out @@ -65,26 +65,24 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + residual filter predicates: {((_col1 > '50') or (_col0 < '50'))} + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -335,26 +333,24 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + residual filter predicates: {((_col1 > '50') or (_col0 < '50'))} + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git ql/src/test/results/clientpositive/ppd_join.q.out ql/src/test/results/clientpositive/ppd_join.q.out index e48c5e2..c9625ad 100644 --- ql/src/test/results/clientpositive/ppd_join.q.out +++ ql/src/test/results/clientpositive/ppd_join.q.out @@ -63,21 +63,19 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + residual filter predicates: {((_col1 > '50') or (_col0 < '50'))} + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -588,21 +586,19 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean) + residual filter predicates: {((_col1 > '50') or (_col0 < '50'))} + Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/ppd_join2.q.out ql/src/test/results/clientpositive/ppd_join2.q.out index 279435a..f37daa1 100644 --- ql/src/test/results/clientpositive/ppd_join2.q.out +++ ql/src/test/results/clientpositive/ppd_join2.q.out @@ -71,16 +71,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {((_col0 <> '10') or (_col2 <> '10'))} Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1751,16 +1749,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + residual filter predicates: {((_col0 <> '10') or (_col2 <> '10'))} Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git ql/src/test/results/clientpositive/ppd_join3.q.out ql/src/test/results/clientpositive/ppd_join3.q.out index cdf887e..5003e51 100644 --- ql/src/test/results/clientpositive/ppd_join3.q.out +++ ql/src/test/results/clientpositive/ppd_join3.q.out @@ -86,21 +86,19 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 + residual filter predicates: {((_col2 > '10') or (_col1 <> '10'))} Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col2 > '10') or (_col1 <> '10')) (type: boolean) + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1797,21 +1795,19 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col3 + residual filter predicates: {((_col2 > '10') or (_col1 <> '10'))} Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col2 > '10') or (_col1 <> '10')) (type: boolean) + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/ppd_join5.q.out ql/src/test/results/clientpositive/ppd_join5.q.out index 551769c..fa546c9 100644 --- ql/src/test/results/clientpositive/ppd_join5.q.out +++ ql/src/test/results/clientpositive/ppd_join5.q.out @@ -249,21 +249,19 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col3, _col4 + residual filter predicates: {((_col3 > 1) or (_col4 > 1))} Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col3 > 1) or (_col4 > 1)) (type: boolean) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) - outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator