diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index 6651900..4af09f0 100644
--- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -468,7 +468,7 @@
"requires \"AND \" on the 1st WHEN MATCHED clause of <{0}>", true),
MERGE_TOO_MANY_DELETE(10405, "MERGE statment can have at most 1 WHEN MATCHED ... DELETE clause: <{0}>", true),
MERGE_TOO_MANY_UPDATE(10406, "MERGE statment can have at most 1 WHEN MATCHED ... UPDATE clause: <{0}>", true),
- INVALID_JOIN_CONDITION(10407, "Error parsing condition in outer join"),
+ INVALID_JOIN_CONDITION(10407, "Error parsing condition in join"),
INVALID_TARGET_COLUMN_IN_SET_CLAUSE(10408, "Target column \"{0}\" of set clause is not found in table \"{1}\".", true),
HIVE_GROUPING_FUNCTION_EXPR_NOT_IN_GROUPBY(10409, "Expression in GROUPING function not present in GROUP BY"),
//========================== 20000 range starts here ========================//
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
index 07fd653..3573d07 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
@@ -361,13 +361,6 @@ protected void initializeOp(Configuration hconf) throws HiveException {
// Create post-filtering evaluators if needed
if (conf.getResidualFilterExprs() != null) {
- // Currently residual filter expressions are only used with outer joins, thus
- // we add this safeguard.
- // TODO: Remove this guard when support for residual expressions can be present
- // for inner joins too. This would be added to improve efficiency in the evaluation
- // of certain joins, since we will not be emitting rows which are thrown away by
- // filter straight away.
- assert !noOuterJoin;
residualJoinFilters = new ArrayList<>(conf.getResidualFilterExprs().size());
residualJoinFiltersOIs = new ArrayList<>(conf.getResidualFilterExprs().size());
for (int i = 0; i < conf.getResidualFilterExprs().size(); i++) {
@@ -377,10 +370,12 @@ protected void initializeOp(Configuration hconf) throws HiveException {
residualJoinFilters.get(i).initialize(outputObjInspector));
}
needsPostEvaluation = true;
- // We need to disable join emit interval, since for outer joins with post conditions
- // we need to have the full view on the right matching rows to know whether we need
- // to produce a row with NULL values or not
- joinEmitInterval = -1;
+ if (!noOuterJoin) {
+ // We need to disable join emit interval, since for outer joins with post conditions
+ // we need to have the full view on the right matching rows to know whether we need
+ // to produce a row with NULL values or not
+ joinEmitInterval = -1;
+ }
}
if (LOG.isInfoEnabled()) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 737aad1..72bdc71 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -1890,7 +1890,7 @@ private boolean validateMapJoinDesc(MapJoinDesc desc) {
return false;
}
if (desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) {
- LOG.info("Cannot vectorize outer join with complex ON clause");
+ LOG.info("Cannot vectorize join with complex ON clause");
return false;
}
return true;
diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index fc6adaf..ae32a28 100644
--- ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -35,6 +35,7 @@
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
@@ -312,9 +313,9 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
return null;
}
- private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
+ protected long evaluateExpression(Statistics stats, ExprNodeDesc pred,
AnnotateStatsProcCtx aspCtx, List neededCols,
- FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException {
+ Operator> op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException {
long newNumRows = 0;
Statistics andStats = null;
@@ -338,11 +339,11 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
// evaluate children
for (ExprNodeDesc child : genFunc.getChildren()) {
newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child,
- aspCtx, neededCols, fop, evaluatedRowCount);
+ aspCtx, neededCols, op, evaluatedRowCount);
if (satisfyPrecondition(aspCtx.getAndExprStats())) {
- updateStats(aspCtx.getAndExprStats(), newNumRows, true, fop);
+ updateStats(aspCtx.getAndExprStats(), newNumRows, true, op);
} else {
- updateStats(aspCtx.getAndExprStats(), newNumRows, false, fop);
+ updateStats(aspCtx.getAndExprStats(), newNumRows, false, op);
}
}
} else if (udf instanceof GenericUDFOPOr) {
@@ -353,24 +354,24 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
evaluatedRowCount = stats.getNumRows();
} else {
newNumRows = StatsUtils.safeAdd(
- evaluateChildExpr(stats, child, aspCtx, neededCols, fop, evaluatedRowCount),
+ evaluateChildExpr(stats, child, aspCtx, neededCols, op, evaluatedRowCount),
newNumRows);
evaluatedRowCount = newNumRows;
}
}
} else if (udf instanceof GenericUDFIn) {
// for IN clause
- newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, fop);
+ newNumRows = evaluateInExpr(stats, pred, aspCtx, neededCols, op);
} else if (udf instanceof GenericUDFBetween) {
// for BETWEEN clause
- newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, fop);
+ newNumRows = evaluateBetweenExpr(stats, pred, aspCtx, neededCols, op);
} else if (udf instanceof GenericUDFOPNot) {
- newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, fop);
+ newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols, op);
} else if (udf instanceof GenericUDFOPNotNull) {
return evaluateNotNullExpr(stats, genFunc);
} else {
// single predicate condition
- newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, fop, evaluatedRowCount);
+ newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols, op, evaluatedRowCount);
}
} else if (pred instanceof ExprNodeColumnDesc) {
@@ -410,7 +411,7 @@ private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
}
private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx,
- List neededCols, FilterOperator fop) throws SemanticException {
+ List neededCols, Operator> op) throws SemanticException {
long numRows = stats.getNumRows();
@@ -500,7 +501,7 @@ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsPr
}
private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx,
- List neededCols, FilterOperator fop) throws SemanticException, CloneNotSupportedException {
+ List neededCols, Operator> op) throws SemanticException, CloneNotSupportedException {
final ExprNodeGenericFuncDesc fd = (ExprNodeGenericFuncDesc) pred;
final boolean invert = Boolean.TRUE.equals(
((ExprNodeConstantDesc) fd.getChildren().get(0)).getValue()); // boolean invert (not)
@@ -528,11 +529,11 @@ private long evaluateBetweenExpr(Statistics stats, ExprNodeDesc pred, AnnotateSt
new GenericUDFOPNot(), Lists.newArrayList(newExpression));
}
- return evaluateExpression(stats, newExpression, aspCtx, neededCols, fop, 0);
+ return evaluateExpression(stats, newExpression, aspCtx, neededCols, op, 0);
}
private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,
- AnnotateStatsProcCtx aspCtx, List neededCols, FilterOperator fop)
+ AnnotateStatsProcCtx aspCtx, List neededCols, Operator> op)
throws CloneNotSupportedException, SemanticException {
long numRows = stats.getNumRows();
@@ -547,7 +548,7 @@ private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred,
long newNumRows = 0;
for (ExprNodeDesc child : genFunc.getChildren()) {
newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols,
- fop, 0);
+ op, 0);
}
return numRows - newNumRows;
} else if (leaf instanceof ExprNodeConstantDesc) {
@@ -832,7 +833,7 @@ private long evaluateComparator(Statistics stats, ExprNodeGenericFuncDesc genFun
private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
AnnotateStatsProcCtx aspCtx, List neededCols,
- FilterOperator fop, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException {
+ Operator> op, long evaluatedRowCount) throws CloneNotSupportedException, SemanticException {
long numRows = stats.getNumRows();
@@ -919,7 +920,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
} else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr
|| udf instanceof GenericUDFIn || udf instanceof GenericUDFBetween
|| udf instanceof GenericUDFOPNot) {
- return evaluateExpression(stats, genFunc, aspCtx, neededCols, fop, evaluatedRowCount);
+ return evaluateExpression(stats, genFunc, aspCtx, neededCols, op, evaluatedRowCount);
} else if (udf instanceof GenericUDFInBloomFilter) {
if (genFunc.getChildren().get(1) instanceof ExprNodeDynamicValueDesc) {
// Synthetic predicates from semijoin opt should not affect stats.
@@ -1405,7 +1406,7 @@ private boolean checkMapSideAggregation(GroupByOperator gop,
* "Database Systems: The Complete Book" by Garcia-Molina et. al.
*
*/
- public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor {
+ public static class JoinStatsRule extends FilterStatsRule implements NodeProcessor {
@Override
@@ -1542,8 +1543,37 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
// update join statistics
stats.setColumnStats(outColStats);
- long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom, jop);
- updateColStats(conf, stats, newRowCount, jop, rowCountParents);
+ long joinRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom, jop);
+ updateColStats(conf, stats, joinRowCount, jop, rowCountParents);
+
+ // evaluate filter expression and update statistics
+ if (joinRowCount != -1 && jop.getConf().getNoOuterJoin() &&
+ jop.getConf().getResidualFilterExprs() != null &&
+ !jop.getConf().getResidualFilterExprs().isEmpty()) {
+ ExprNodeDesc pred;
+ if (jop.getConf().getResidualFilterExprs().size() > 1) {
+ pred = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ FunctionRegistry.getGenericUDFForAnd(),
+ jop.getConf().getResidualFilterExprs());
+ } else {
+ pred = jop.getConf().getResidualFilterExprs().get(0);
+ }
+ // evaluate filter expression and update statistics
+ try {
+ newNumRows = evaluateExpression(stats, pred,
+ aspCtx, jop.getSchema().getColumnNames(), jop, 0);
+ } catch (CloneNotSupportedException e) {
+ throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+ }
+ // update statistics based on column statistics.
+ // OR conditions keeps adding the stats independently, this may
+ // result in number of rows getting more than the input rows in
+ // which case stats need not be updated
+ if (newNumRows <= joinRowCount) {
+ updateStats(stats, newNumRows, true, jop);
+ }
+ }
+
jop.setStatistics(stats);
if (LOG.isDebugEnabled()) {
@@ -1599,9 +1629,37 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor);
newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor);
}
+
Statistics wcStats = new Statistics();
wcStats.setNumRows(newNumRows);
wcStats.setDataSize(newDataSize);
+
+ // evaluate filter expression and update statistics
+ if (jop.getConf().getNoOuterJoin() &&
+ jop.getConf().getResidualFilterExprs() != null &&
+ !jop.getConf().getResidualFilterExprs().isEmpty()) {
+ long joinRowCount = newNumRows;
+ ExprNodeDesc pred;
+ if (jop.getConf().getResidualFilterExprs().size() > 1) {
+ pred = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ FunctionRegistry.getGenericUDFForAnd(),
+ jop.getConf().getResidualFilterExprs());
+ } else {
+ pred = jop.getConf().getResidualFilterExprs().get(0);
+ }
+ // evaluate filter expression and update statistics
+ try {
+ newNumRows = evaluateExpression(wcStats, pred,
+ aspCtx, jop.getSchema().getColumnNames(), jop, 0);
+ } catch (CloneNotSupportedException e) {
+ throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+ }
+ // update only the basic statistics in the absence of column statistics
+ if (newNumRows <= joinRowCount) {
+ updateStats(wcStats, newNumRows, false, jop);
+ }
+ }
+
jop.setStatistics(wcStats);
if (LOG.isDebugEnabled()) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 9e84a29..e36c616 100644
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8143,11 +8143,12 @@ private Operator genJoinOperatorChildren(QBJoinTree join, Operator left,
join.getNoOuterJoin(), joinCondns, filterMap, joinKeys, null);
desc.setReversedExprs(reversedExprs);
desc.setFilterMap(join.getFilterMap());
- // For outer joins, add filters that apply to more than one input
- if (!join.getNoOuterJoin() && join.getPostJoinFilters().size() != 0) {
+ // Add filters that apply to more than one input
+ if (join.getPostJoinFilters().size() != 0) {
+ LOG.debug("Generate JOIN with post-filtering conditions");
List residualFilterExprs = new ArrayList();
for (ASTNode cond : join.getPostJoinFilters()) {
- residualFilterExprs.add(genExprNodeDesc(cond, outputRR));
+ residualFilterExprs.add(genExprNodeDesc(cond, outputRR, false, isCBOExecuted()));
}
desc.setResidualFilterExprs(residualFilterExprs);
// Clean post-conditions
@@ -8357,15 +8358,7 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree,
if (joinTree.getPostJoinFilters().size() != 0) {
// Safety check for postconditions
- assert joinTree.getNoOuterJoin();
- Operator op = joinOp;
- for(ASTNode condn : joinTree.getPostJoinFilters()) {
- op = genFilterPlan(qb, condn, op, false);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Generated " + op + " with post-filtering conditions after JOIN operator");
- }
- }
- return op;
+ throw new SemanticException("Post-filtering conditions should have been added to the JOIN operator");
}
return joinOp;
diff --git ql/src/test/results/clientpositive/join43.q.out ql/src/test/results/clientpositive/join43.q.out
index 24168ca..06f59cc 100644
--- ql/src/test/results/clientpositive/join43.q.out
+++ ql/src/test/results/clientpositive/join43.q.out
@@ -258,22 +258,20 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 > _col3) (type: boolean)
+ residual filter predicates: {(_col1 > _col3)}
+ Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col3)
+ keys: _col0 (type: string), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: max(_col3)
- keys: _col0 (type: string), _col1 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -476,22 +474,20 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (_col1 > _col3) (type: boolean)
+ residual filter predicates: {(_col1 > _col3)}
+ Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: max(_col3)
+ keys: _col0 (type: string), _col1 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: max(_col3)
- keys: _col0 (type: string), _col1 (type: int)
- mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
diff --git ql/src/test/results/clientpositive/join45.q.out ql/src/test/results/clientpositive/join45.q.out
index 3c373e6..20dce5d 100644
--- ql/src/test/results/clientpositive/join45.q.out
+++ ql/src/test/results/clientpositive/join45.q.out
@@ -359,20 +359,18 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 or UDFToDouble(_col3) BETWEEN 100.0 AND 102.0) (type: boolean)
- Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ residual filter predicates: {((_col0 = _col2) or UDFToDouble(_col1) BETWEEN 100.0 AND 102.0 or UDFToDouble(_col3) BETWEEN 100.0 AND 102.0)}
+ Statistics: Num rows: 9026 Data size: 173876 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -464,20 +462,18 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0) and ((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0)) (type: boolean)
- Statistics: Num rows: 1388 Data size: 26738 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ residual filter predicates: {((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0)} {((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0)}
+ Statistics: Num rows: 1388 Data size: 26738 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -563,20 +559,18 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0) or ((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0)) (type: boolean)
- Statistics: Num rows: 8332 Data size: 160507 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ residual filter predicates: {(((UDFToDouble(_col0) + UDFToDouble(_col2)) >= 100.0) or ((UDFToDouble(_col0) + UDFToDouble(_col2)) <= 102.0))}
+ Statistics: Num rows: 8332 Data size: 160507 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -664,20 +658,18 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102)) (type: boolean)
- Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ residual filter predicates: {(struct(_col0,_col2)) IN (const struct(100,100), const struct(101,101), const struct(102,102))}
+ Statistics: Num rows: 3125 Data size: 60200 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -795,24 +787,22 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) >= 100.0) (type: boolean)
+ residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) >= 100.0)}
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string), _col2 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -942,20 +932,18 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean)
- Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)}
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1046,16 +1034,14 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0) (type: boolean)
- Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ residual filter predicates: {((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0)}
+ Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -1228,20 +1214,18 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean)
- Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)}
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1322,16 +1306,14 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 12500 Data size: 240800 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0) (type: boolean)
- Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ residual filter predicates: {((UDFToDouble(_col2) + UDFToDouble(_col0)) >= 100.0)}
+ Statistics: Num rows: 4166 Data size: 80253 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -1504,20 +1486,18 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 13750 Data size: 264875 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0) (type: boolean)
- Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ residual filter predicates: {((UDFToDouble(_col4) + UDFToDouble(_col0)) <= 102.0)}
+ Statistics: Num rows: 4583 Data size: 88285 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 190 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1677,24 +1657,22 @@ STAGE PLANS:
2
3
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
- Statistics: Num rows: 176000 Data size: 9396800 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: (((_col0 = _col24) or ((_col1 = _col25) and (_col26 = _col20))) and ((_col12 = _col24) or ((_col27 = _col15) and (_col13 = _col25))) and ((_col6 = _col24) or ((_col28 = _col16) and (_col8 = 42)))) (type: boolean)
+ residual filter predicates: {((_col6 = _col24) or ((_col28 = _col16) and (_col8 = 42)))} {((_col12 = _col24) or ((_col27 = _col15) and (_col13 = _col25)))} {((_col0 = _col24) or ((_col1 = _col25) and (_col26 = _col20)))}
+ Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: string), _col19 (type: string), _col20 (type: int), _col21 (type: float), _col22 (type: boolean), _col23 (type: string), _col24 (type: string), _col25 (type: string), _col26 (type: int), _col27 (type: float), _col28 (type: boolean), _col29 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: float), _col16 (type: boolean), _col17 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: float), _col10 (type: boolean), _col11 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col18 (type: string), _col19 (type: string), _col20 (type: int), _col21 (type: float), _col22 (type: boolean), _col23 (type: string), _col24 (type: string), _col25 (type: string), _col26 (type: int), _col27 (type: float), _col28 (type: boolean), _col29 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: float), _col4 (type: boolean), _col5 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: float), _col16 (type: boolean), _col17 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: float), _col10 (type: boolean), _col11 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29
- Statistics: Num rows: 74250 Data size: 3964275 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 10
+ Limit
+ Number of rows: 10
+ Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 10 Data size: 530 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git ql/src/test/results/clientpositive/join_alt_syntax.q.out ql/src/test/results/clientpositive/join_alt_syntax.q.out
index 1c08e6a..8083b65 100644
--- ql/src/test/results/clientpositive/join_alt_syntax.q.out
+++ ql/src/test/results/clientpositive/join_alt_syntax.q.out
@@ -322,21 +322,19 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col0 + _col3) = _col3) (type: boolean)
+ residual filter predicates: {((_col0 + _col3) = _col3)}
+ Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col4 (type: string), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col4 (type: string), _col1 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out
index 831d84f..6ccc536 100644
--- ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out
+++ ql/src/test/results/clientpositive/join_cond_pushdown_1.q.out
@@ -263,21 +263,19 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26
- Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col0 + _col18) = _col18) (type: boolean)
+ residual filter predicates: {((_col0 + _col18) = _col18)}
+ Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26
Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26
+ File Output Operator
+ compressed: false
Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out
index 560b8e0..db254b1 100644
--- ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out
+++ ql/src/test/results/clientpositive/join_cond_pushdown_3.q.out
@@ -269,21 +269,19 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26
- Statistics: Num rows: 728 Data size: 178830 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col0 + _col18) = _col18) (type: boolean)
+ residual filter predicates: {((_col0 + _col18) = _col18)}
+ Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26
Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col18 (type: int), _col19 (type: string), _col20 (type: string), _col21 (type: string), _col22 (type: string), _col23 (type: int), _col24 (type: string), _col25 (type: double), _col26 (type: string), _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: int), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: int), _col15 (type: string), _col16 (type: double), _col17 (type: string)
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26
+ File Output Operator
+ compressed: false
Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 364 Data size: 89415 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out
index f77c75c..b60e782 100644
--- ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out
+++ ql/src/test/results/clientpositive/join_cond_pushdown_unqual1.q.out
@@ -277,16 +277,14 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 26 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col9 + _col0) = _col0) (type: boolean)
- Statistics: Num rows: 13 Data size: 1586 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ residual filter predicates: {((_col9 + _col0) = _col0)}
+ Statistics: Num rows: 13 Data size: 1586 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
diff --git ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out
index f15d536..c24cea9 100644
--- ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out
+++ ql/src/test/results/clientpositive/join_cond_pushdown_unqual3.q.out
@@ -283,16 +283,14 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 26 Data size: 3173 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col9 + _col0) = _col0) (type: boolean)
- Statistics: Num rows: 13 Data size: 1586 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ residual filter predicates: {((_col9 + _col0) = _col0)}
+ Statistics: Num rows: 13 Data size: 1586 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
diff --git ql/src/test/results/clientpositive/ppd_gby_join.q.out ql/src/test/results/clientpositive/ppd_gby_join.q.out
index a160410..03767d5 100644
--- ql/src/test/results/clientpositive/ppd_gby_join.q.out
+++ ql/src/test/results/clientpositive/ppd_gby_join.q.out
@@ -65,26 +65,24 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean)
+ residual filter predicates: {((_col1 > '50') or (_col0 < '50'))}
+ Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -335,26 +333,24 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean)
+ residual filter predicates: {((_col1 > '50') or (_col0 < '50'))}
+ Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: _col0
Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string)
- outputColumnNames: _col0
+ Group By Operator
+ aggregations: count(1)
+ keys: _col0 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- Group By Operator
- aggregations: count(1)
- keys: _col0 (type: string)
- mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
diff --git ql/src/test/results/clientpositive/ppd_join.q.out ql/src/test/results/clientpositive/ppd_join.q.out
index e48c5e2..c9625ad 100644
--- ql/src/test/results/clientpositive/ppd_join.q.out
+++ ql/src/test/results/clientpositive/ppd_join.q.out
@@ -63,21 +63,19 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean)
+ residual filter predicates: {((_col1 > '50') or (_col0 < '50'))}
+ Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -588,21 +586,19 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6 Data size: 69 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col1 > '50') or (_col0 < '50')) (type: boolean)
+ residual filter predicates: {((_col1 > '50') or (_col0 < '50'))}
+ Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col2 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col2 (type: string)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 4 Data size: 46 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git ql/src/test/results/clientpositive/ppd_join2.q.out ql/src/test/results/clientpositive/ppd_join2.q.out
index 279435a..f37daa1 100644
--- ql/src/test/results/clientpositive/ppd_join2.q.out
+++ ql/src/test/results/clientpositive/ppd_join2.q.out
@@ -71,16 +71,14 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
+ residual filter predicates: {((_col0 <> '10') or (_col2 <> '10'))}
Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean)
- Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -1751,16 +1749,14 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
+ residual filter predicates: {((_col0 <> '10') or (_col2 <> '10'))}
Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col0 <> '10') or (_col2 <> '10')) (type: boolean)
- Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
diff --git ql/src/test/results/clientpositive/ppd_join3.q.out ql/src/test/results/clientpositive/ppd_join3.q.out
index cdf887e..5003e51 100644
--- ql/src/test/results/clientpositive/ppd_join3.q.out
+++ ql/src/test/results/clientpositive/ppd_join3.q.out
@@ -86,21 +86,19 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col1, _col2, _col3
+ residual filter predicates: {((_col2 > '10') or (_col1 <> '10'))}
Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col2 > '10') or (_col1 <> '10')) (type: boolean)
+ Select Operator
+ expressions: _col1 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1797,21 +1795,19 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
outputColumnNames: _col1, _col2, _col3
+ residual filter predicates: {((_col2 > '10') or (_col1 <> '10'))}
Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col2 > '10') or (_col1 <> '10')) (type: boolean)
+ Select Operator
+ expressions: _col1 (type: string), _col3 (type: string)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col1 (type: string), _col3 (type: string)
- outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git ql/src/test/results/clientpositive/ppd_join5.q.out ql/src/test/results/clientpositive/ppd_join5.q.out
index 551769c..fa546c9 100644
--- ql/src/test/results/clientpositive/ppd_join5.q.out
+++ ql/src/test/results/clientpositive/ppd_join5.q.out
@@ -249,21 +249,19 @@ STAGE PLANS:
0
1
outputColumnNames: _col0, _col1, _col3, _col4
+ residual filter predicates: {((_col3 > 1) or (_col4 > 1))}
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: ((_col3 > 1) or (_col4 > 1)) (type: boolean)
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int)
- outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator